view mannwhitney_de.xml @ 5:a7eb04240b8b draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_mannwhitney_de commit 9286176209be97cd4b972c034345cf060bd7783e
author artbio
date Thu, 07 Nov 2024 21:37:11 +0000
parents 6916ac5a9ef0
children
line wrap: on
line source

<tool id="mannwhitney_de" name="Perform a differential analysis" version="4.1.3+galaxy1">
    <description>using a Mann-Whitney test</description>
    <xrefs>
        <xref type="bio.tools">galaxy_single_cell_suite</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="1.7.1">r-optparse</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Tool exception" />
    </stdio>
    <command detect_errors="exit_code"><![CDATA[ 
        Rscript $__tool_directory__/MannWhitney_DE.R 
            --input '$input' 
            --sep 
            #if $sep == 'tab':
              'tab'
            #elif $sep == 'comma':
              'comma'
            #end if
            --colnames '$colnames'
            --factor1 '$factor1'
            --factor2 '$factor2'
            --comparison_factor_file '$comparison_factor_file'
            --fdr '$fdr'
            $log
            --output '$output'
]]></command>
    <inputs>
        <param name="input" type="data" format="txt,tabular" label="Expression data" help="a csv or tsv table file" />
        <param name="sep" type="select" label="Indicate column separator">
            <option value="tab" selected="true">Tabs</option>
            <option value="comma">Comma</option>
        </param>
        <param name="colnames" type="select" label="Firt row contains column names">
            <option value="TRUE" selected="true">Yes</option>
            <option value="FALSE">No</option>
        </param>
        <param name="comparison_factor_file" type="data" format="tabular" label="Comparison factor table"
               help="A tsv table file with two columns : cell identifiers and a column that split cells into two categories.
                     Other columns are ignored" />
        <param name="factor1" type="text" label="Condition-1. The first level (value) that the comparison factor can take." help="typical values could be 'LOW', 'wt', 't1', etc."/>
        <param name="factor2" type="text" label="Condition-2. The second level (value) that the comparison factor can take." help="typical values could be 'HIGH', 'mutant', 't2', etc."/>
        <param name="fdr" type="float" value="0.01" label="FDR threshold"
               help="Reject H0 of no differential expression if adjusted p-values (Benjamini-Hochberg correction) is higher than the FDR cut-off."/>
        <param name="log" type="boolean" checked="false" label="Expression data are log-transformed" truevalue="--log" falsevalue=""/>

    </inputs>
    <outputs>
        <data name="output" format="tabular" label="Results of Mann-Whitney differential analysis of ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="input" value="input.tsv" ftype="tabular"/>
            <param name="sep" value="tab" />
            <param name="colnames" value="TRUE"/>
            <param name="comparison_factor_file" value="factor_2col.tsv" ftype="tabular"/>
            <param name="factor1" value="LOW"/>
            <param name="factor2" value="HIGH"/>
            <param name="fdr" value="0.05"/>
            <param name="log" value="true"/>
            <output name="output" file="result.tsv" ftype="tabular"/>
        </test>
        <test>
            <param name="input" value="input.csv" ftype="txt"/>
            <param name="sep" value="comma" />
            <param name="colnames" value="TRUE"/>
            <param name="comparison_factor_file" value="factor_3col.tsv" ftype="tabular"/>
            <param name="factor1" value="LOW"/>
            <param name="factor2" value="HIGH"/>
            <param name="fdr" value="0.05"/>
            <param name="log" value="true"/>
            <output name="output" file="result_from_csv.tsv" ftype="tabular"/>
        </test>
    </tests>
    <help>

**What it does**

The tools takes a table of gene expression values (e.E. log2(CPM+1), etc...) from single cell RNAseq sequencing libraries (columns)

and a metadata file that contains at least two columns :
    * Cell identifiers
    * A Column that differentiates cell in two groups (the two levels of a comparison factor). It must be a column with only values (the factor levels).

For each gene (rows in expression data file), this script perform a 2-sided Mann-Whitney test between 
the two groups of cells (high/low, mutant/wild type) and then adjust the returned p-values by using the
Benjamini-Hochberg (BH) correction. A False Discovery Rate (FDR) threshold is used to determine if gene expression
can be considered as significantly deviant for the H0 hypothesis of no-differential-expression (p-adjust below FDR cut-off) or not (p-adjust above the FDR cut-off). 

.. class:: warningmark

**Comparison plan**

Note that log2Fold-Changes computed by the tool are based on the comparison of condition-2 (level-2) versus condition-1 (level-1) i.e.
the tools returns the log2FC in condition-2 **relative** to condition-1.

**Output**

The tools returns a result table. For each row (genes) :

==================== ======================================================================================================
              Column Description
-------------------- ------------------------------------------------------------------------------------------------------
                mean mean expression across all cells
                  SD standard deviation of its expression across all cells
            variance variance of expression across all cells
Percentage_detection (number of cells where the gene is detected (expression value superior to 0) / number of cells) * 100
        mean_factor2 mean expression across cells of the second group (of log transformed expression values)
        mean_factor1 mean expression across cells of the first group (of log transformed expression values)
         fold_change mean_factor1 - mean_factor2 (difference of log)
           statistic W statistic of Mann-Whitney test
             p.value p-value of Mann-Whitney test
            p.adjust p-values adjusted from the BH correction
         Significant if p-adjust superior to FDR cut-off then the value is *NS*, if inferior then it's either *UP* or *DOWN* depending on the sign of fold change 
==================== ======================================================================================================

    </help>
    <citations>
        <citation type="bibtex">
        @Manual{,
             title = {R: A Language and Environment for Statistical Computing},
             author = {{R Core Team}},
             organization = {R Foundation for Statistical Computing},
             address = {Vienna, Austria},
             year = {2014},
             url = {http://www.R-project.org/},
        }
        </citation>
    </citations>
</tool>