view mageck_gsea.xml @ 3:f259c29b3832 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/mageck commit 95daf3f97e89989bae687e64cae8b129b3e2b7af
author iuc
date Thu, 19 Apr 2018 05:34:15 -0400
parents d95eae71878e
children b946929d31c4
line wrap: on
line source

<?xml version="1.0"?>
<tool id="mageck_gsea" name="MAGeCK GSEA" version="@VERSION@" >
    <description>- a fast implementation of Gene Set Enrichment Analysis</description>
    <macros>
        <import>mageck_macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <version_command>mageckGSEA --version</version_command>
    <command detect_errors="exit_code"><![CDATA[

mageckGSEA

-r '$rank_file'
-g '$gmt_file'
-c $score_column

#if str( $pathway_name ) != "":
    -n '$pathway_name'
#end if

#if $adv.reverse_value:
    -e
#end if
#if $adv.sort_byp:
    -s
#end if
-p $adv.perm_time
-o output_file

    ]]></command>
    <inputs>
        <param name="rank_file" argument="--rank_file" type="data" format="tabular" label="Gene Ranking file" help="A gene ranking file such as that generated by mageck test. The first column of the rank file must contain gene identifiers, of the same type used in the GMT file" />
        <param name="gmt_file" argument="--gmt-file" type="data" format="tabular" label="Pathway GMT file" help="The pathway file in GMT format. See Help below for more information" />
        <param argument="--score_column" type="integer" value="0" optional="true" label="Gene Ranking column number" help="The column number containing gene scores in the gene ranking file. If you just want to use the ranking of the gene (located at the 1st column), use 0. The column number starts from 0. Default: 0" />
        <param argument="--pathway_name" type="text" value="" label="Name of the pathway to be tested" help=" If not found, will test all pathways" />
        <section name="adv" title="Advanced Options">
            <param argument="--reverse_value" type="boolean" truevalue="--reverse_value" falsevalue="" checked="false" optional="true" label="Reverse the order of the gene" help="Default: No" />
            <param argument="--sort_byp" type="boolean" truevalue="--sort_byp"  falsevalue="" checked="true" optional="true" label="Sort the pathways by p value" help="Default: Yes" />
            <param argument="--perm_time" type="integer" min="0" value="1000" optional="true" label="Number of permutation number" help="Default: 1000" />
        </section>
    </inputs>
    <outputs>
        <data name="output_file" format="tabular" from_work_dir="output_file" label="${tool.name} on ${on_string}: mageckGSEA Output" />
    </outputs>
    <tests>
        <test><!-- Ensure MAGeCK's demo1 test works -->
            <param name="rank_file" ftype="tabular" value="out.test.gene_summary.txt" />
            <param name="gmt_file" ftype="tabular" value="in.mageckQC.gmt" />
            <output name="output_file" value="out.mageckGSEA.txt" />
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

mageckGSEA_ is a fast implementation of Gene Set Enrichment Analysis (GSEA). It's used by MAGeCK_ for quality controls and pathway enrichment tests.

.. _mageckGSEA: https://sourceforge.net/p/mageck/wiki/usage/#mageckgsea
.. _MAGeCK: http://liulab.dfci.harvard.edu/Mageck/

-----

**Inputs**

**Gene Ranking file**

mageckGSEA requires a gene ranking file as input. A gene raking file can be produced using **mageck test**. An example of the gene ranking file is as follows:

======= ======= ============= =============== =========== ============ ================= =========== ============= =============== =========== ============ ================= ===========
**id**  **num** **neg|score** **neg|p-value** **neg|fdr** **neg|rank** **neg|goodsgrna** **neg|lfc** **pos|score** **pos|p-value** **pos|fdr** **pos|rank** **pos|goodsgrna** **pos|lfc**
------- ------- ------------- --------------- ----------- ------------ ----------------- ----------- ------------- --------------- ----------- ------------ ----------------- -----------
ESPL1   12      6.4327e-10    7.558e-06       7.9e-05      1           -2.35             11          0.99725       0.99981         0.999992    615          0                 -0.07
RPL18   12      6.4671e-10    7.558e-06       7.9e-05      2           -2.12             11          0.99799       0.99989         0.999992    620          0                 -0.32
CDK1    12      2.6439e-09    7.558e-06       7.9e-05      3           -1.93             12          1.0           0.99999         0.999992    655          0                 -0.12
======= ======= ============= =============== =========== ============ ================= =========== ============= =============== =========== ============ ================= ===========


**Pathway file**

mageckGSEA also requires a pathway file in GMT format. The GMT (Gene Matrix Transposed) file format is a tab delimited file format that describes gene sets and is consistent with the `GMT file in Gene Set Enrichment Analysis (GSEA)`_. In the GMT format, each row represents a gene set, with the first column containing the gene set name, and the second column containing a description for the gene set, followed by the names or ids of the genes in the gene set. You can download different GMT pathway files directly from the `GSEA MSigDB database`_.  An example of the GMT format is as follows:

=================  =============================================================  =======================
**Gene Set Name**  **Description**                                                **Genes**
-----------------  -------------------------------------------------------------  -----------------------
KEGG_RIBOSOME      http://www.broadinstitute.org/gsea/msigdb/cards/KEGG_RIBOSOME  RPL35   RPL23   RPL3...
=================  =============================================================  =======================

-----

**Outputs**

**mageckGSEA output file**

An example of the output file is as follows:

============= ======== ====== ===== ================= ======= =========== ======== =======
**Pathway**   **Size** **ES** **p** **p_permutation** **FDR** **Ranking** **Hits** **LFC**
------------- -------- ------ ----- ----------------- ------- ----------- -------- -------
KEGG_RIBOSOME    88    1      0     0                 0       1           0        0
============= ======== ====== ===== ================= ======= =========== ======== =======

Pathways are sorted by P value field by default in the output.

.. _`GMT file in Gene Set Enrichment Analysis (GSEA)`: http://software.broadinstitute.org/cancer/software/gsea/wiki/index.php/Data_formats#GMT:_Gene_Matrix_Transposed_file_format_.28.2A.gmt.29
.. _`GSEA MSigDB database`: http://software.broadinstitute.org/gsea/login.jsp

    ]]></help>
      <expand macro="citations" />
</tool>