view cluster_profiler.xml @ 13:f6107b8ae8f8 draft default tip

"planemo upload commit be9070db4a3b178ab45ecd9c9c3ab369240f7617-dirty"
author proteore
date Fri, 09 Apr 2021 14:39:05 +0000
parents 85f039f53414
children
line wrap: on
line source

<tool id="cluter_profiler" name="Classification and enrichment analysis" version="2021.04.08">
    <description>(Human, Mouse, Rat)[clusterProfiler]</description>
    <requirements>
        <requirement type="package" version="4.0.3">r-base</requirement>
        <requirement type="package" version="3.12.0">bioconductor-org.hs.eg.db</requirement>
        <requirement type="package" version="3.12.0">bioconductor-org.mm.eg.db</requirement>
        <requirement type="package" version="3.12.0">bioconductor-org.rn.eg.db</requirement>
        <requirement type="package" version="1.52.0">bioconductor-annotationdbi</requirement>
        <requirement type="package" version="3.16.0">bioconductor-dose</requirement>
        <requirement type="package" version="3.18.1">bioconductor-clusterprofiler</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        Rscript "$__tool_directory__/GO-enrich.R"
        #if $input.ids == "text"
            --input_type="text"
            --input="$input.txt"
        #else
            --input_type="file"
            --input="$input.file"
            --ncol="$input.ncol"
            --header="$input.header"
        #end if

        --id_type="$idti.idtypein"

        --species="$species"

        #if $ggo.go_represent == "true"
            --go_represent="true"
            --level="$ggo.level"
        #else
            --go_represent="false"
        #end if

        #if $ego.go_enrich == "true"
            --plot="$ego.plot"
            --go_enrich="true"
            --pval_cutoff="$ego.pval"
            --qval_cutoff="$ego.qval"
            #if $ego.universe.universe_option == "true"
                #if $ego.universe.universe_input.universe_ids == "text"
                    --universe_type="text"
                    --universe="$ego.universe.universe_input.txt"
                #else
                    --universe_type="file"
                    --universe="$ego.universe.universe_input.file"
                    --uncol="$ego.universe.universe_input.ncol"
                    --uheader="$ego.universe.universe_input.header"
                #end if
                --universe_id_type="$ego.universe.universe_idti.universe_idtypein"
            #end if
        #else
            --go_enrich="false"
        #end if

        --onto_opt="$ontology" > $log
    ]]></command>
    <inputs>
        <conditional name="input" >
            <param name="ids" type="select" label="Enter your IDs (UniProt Accession number or Gene ID)" help="Copy/paste or from a file (e.g. table)" >
                <option value="text">Copy/paste your IDs</option>
                <option value="file" selected="true">Input file containing your IDs</option>
            </param>
            <when value="text" >
                <param name="txt" type="text" label="Copy/paste your identifiers" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
                    <sanitizer>
                        <valid initial="string.printable">
                            <remove value="&apos;"/>
                        </valid>
                        <mapping initial="none">
                            <add source="&apos;" target="__sq__"/>
                        </mapping>
                    </sanitizer>
                </param>
            </when>
            <when value="file" >
                <param name="file" type="data" format="txt,tabular" label="Choose a file that contains your list of IDs" help="" />
                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                <param name="ncol" type="text" value="c1" label="Column number of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
                </param>
            </when>
        </conditional>
        <conditional name="idti" >
            <param name="idtypein" type="select" label="Select type/source of IDs" help="" >
                <option value="Uniprot">UniProt accession number (e.g. P31946)</option>
                <option value="Entrez">Entrez Gene ID (e.g. 4151)</option>
            </param>
            <when value="Uniprot"/>
            <when value="Entrez"/>
        </conditional>
        <param name="species" type="select" label="Species" >
            <option value="org.Hs.eg.db">Human (Homo sapiens) </option>
            <option value="org.Mm.eg.db">Mouse (Mus musculus) </option>
            <option value="org.Rn.eg.db">Rat (Rattus norvegicus)</option>
        </param>
		<param name="ontology" type="select" display="checkboxes" multiple="true" label="Select GO terms category" optional="false" >
            <option value="CC">Cellular Component</option>
            <option value="BP">Biological Process</option>
            <option value="MF">Molecular Function</option>
        </param>
        <conditional name="ggo">
            <param name="go_represent" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Perform GO categories representation analysis?"/>
            <when value="true">
                <param name="level" type="select" label="Ontology level (the higher this number, the deeper the GO level, up to 3)">
				    <option value="1">1</option>
				    <option value="2" selected="True">2</option>
				    <option value="3">3</option>
			    </param>
            </when>
            <when value="false"/>
        </conditional>
        <conditional name="ego">
            <param name="go_enrich" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Perform GO categories enrichment analysis?"/>
            <when value="true">
                <param name="pval" type="float" value="0.01" label="P-value cut off"/>
			    <param name="qval" type="float" value="0.05" label="Q-value cut off"/>
                <conditional name="universe" >
                    <param name="universe_option" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Define your own background IDs?"/>
                    <when value="true">
                        <conditional name="universe_input">
                            <param name="universe_ids" type="select" label="Enter your background IDs (UniProt Accession number or Entrez Gene ID)" help="Copy/paste or from a file (e.g. table)" >
                                <option value="text">Copy/paste your background IDs</option>
                                <option value="file" selected="true">Input file containing your background IDs</option>
                            </param>
                            <when value="text" >
                                <param name="txt" type="text" label="Copy/paste your background IDs" help='IDs must be separated by spaces into the form field, for example: P31946 P62258' >
                                    <sanitizer>
                                        <valid initial="string.printable">
                                            <remove value="&apos;"/>
                                        </valid>
                                        <mapping initial="none">
                                            <add source="&apos;" target="__sq__"/>
                                        </mapping>
                                    </sanitizer>
                                </param>
                            </when>
                            <when value="file" >
                                <param name="file" type="data" format="txt,tabular" label="Select file that contains your background IDs list" help="" />
                                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
                                <param name="ncol" type="text" value="c1" label="Column number of IDs" help='For example, fill in "c1" if it is the first column, "c2" if it is the second column and so on'>
                                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
                                </param>
                            </when>
                        </conditional>
                        <conditional name="universe_idti" >
                            <param name="universe_idtypein" type="select" label="Select type of IDs of your background" help="" >
                                <option value="Uniprot">UniProt Accession number</option>
                                <option value="Entrez">Entrez Gene ID</option>
                            </param>
                            <when value="Uniprot"/>
                            <when value="Entrez"/>
                        </conditional>
                    </when>
                    <when value="false"/>
                </conditional>
                <param name="plot" type="select" display="checkboxes" multiple="true" label="Graphical display" optional="false">
                    <option selected = "true" value="dotplot">dot-plot</option>
                    <option value="barplot">bar-plot</option>
                </param>
            </when>
            <when value="false"/>
        </conditional>
    </inputs>
    <outputs>
        <data name="log" format="tsv" label="Cluster profiler" />
        <collection type="list" label="clusterProfiler text files" name="text_output">
            <discover_datasets pattern="(?P&lt;designation&gt;.+\.tsv)" ext="tsv"/>
        </collection>
        <collection type="list" label="clusterProfiler diagram outputs" name="graph_output" >
	        <discover_datasets pattern="(?P&lt;designation&gt;.+plot)" ext="png" />
	    </collection>
    </outputs>
    <tests>
        <test>
            <conditional name="input">
                <param name="ids" value="file"/>
                <param name="file" value="Lacombe_et_al_2017_OK.txt"/>
                <param name="header" value="true"/>
                <param name="ncol" value="c1"/>
            </conditional>
            <conditional name="idti">
                <param name="idtypein" value="Uniprot"/>
            </conditional>
            <param name="species" value="org.Hs.eg.db"/>
            <conditional name="ggo">
                <param name="go_represent" value="true"/>
                <param name="level" value="3"/>
            </conditional>
            <conditional name="ego">
                <param name="go_enrich" value="true"/>
                 <conditional name="universe_input">
                    <param name="universe_ids" value="file"/>
                    <param name="file" value="background_ids.txt"/>
                    <param name="header" value="true"/>
                    <param name="ncol" value="c7"/>
                 </conditional>
                 <conditional name="universe_idti" >
                    <param name="universe_idtypein" value="Uniprot"/>
                 </conditional>
            </conditional>
            <param name="ontology" value="CC,BP,MF"/>
            <param name="plot" value="dotplot,barplot"/>
            <output name="log" file="log.txt" />
            <output_collection name="text_output">
                <element name="cluster_profiler_GGO_CC.tsv" file="cluster_profiler_GGO_CC.csv" ftype="tsv"/>
<!---                <element name="cluster_profiler_GGO_BP.tsv" file="cluster_profiler_GGO_BP.csv" ftype="tsv"/> -->
                <element name="cluster_profiler_GGO_MF.tsv" file="cluster_profiler_GGO_MF.csv" ftype="tsv"/>
                <element name="cluster_profiler_EGO_CC.tsv" file="cluster_profiler_EGO_CC.csv" ftype="tsv"/>
                <element name="cluster_profiler_EGO_BP.tsv" file="cluster_profiler_GGO_BP.csv" ftype="tsv"/>
                <element name="cluster_profiler_EGO_MF.tsv" file="cluster_profiler_EGO_MF.csv" ftype="tsv"/>
            </output_collection>
            <output_collection name="graph_output">
                <element name="GGO_CC_bar-plot" file="GGO_CC_bar-plot" ftype="png"/>
                <element name="GGO_BP_bar-plot" file="GGO_BP_bar-plot" ftype="png"/>
                <element name="GGO_MF_bar-plot" file="GGO_MF_bar-plot" ftype="png"/>
                <element name="EGO_CC_bar-plot" file="EGO_CC_bar-plot" ftype="png"/>
                <element name="EGO_BP_bar-plot" file="EGO_BP_bar-plot" ftype="png"/>
                <element name="EGO_CC_dot-plot" file="EGO_CC_dot-plot" ftype="png"/>
                <element name="EGO_BP_dot-plot" file="EGO_BP_dot-plot" ftype="png"/>
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[

**Description**

This tool is based on R package clusterProfiler and allows to perform GO terms classification and enrichment analyses on gene/protein sets (e.g. given a set of genes that are up-regulated under certain conditions, an enrichment analysis will find which GO terms are over-represented (or under-represented) using annotations for that gene/protein set).

Given a list of IDs, this tool:

(i)  performs gene classification based on GO distribution at a specific level,

(ii) calculates GO categories enrichment (over- or under-representation) for the IDs of the input list, compared to a background. User has the possibility to use background corresponding to the whole organism or to a user-defined list. In this latter case, we recommand to use the "Build tissue-specific expression dataset" ProteoRE tool to create this list according to your need.

-----

**Input**

Two modes are allowed: either by supplying a tabular file (.csv, .tsv, .txt, .tab) including your IDs (identifiers) or by copy/pasting your IDs (separated by a space).

"Select type/source of IDs": only entrez gene ID (e.g. 4151, 7412) or Uniprot accession number (e.g. P31946) are allowed. If your list is not in this form, please use the ID_Converter tool of ProteoRE.

.. class:: warningmark

In copy/paste mode, the number of IDs considered in input is limited to 5000.

-----

**Parameters**

"Species": the three supported species are Homo sapiens, Mus musculus and Rattus norvegicus

"Perform GO categories representation analysis?": classify genes based on their projection at a specific level of the GO corpus (see parameter below), and provides functions (set to "Yes")

"Ontology level (the higher this number, the deeper the GO level)": correspond to the level of GO hierarchy (from 1 to 3) (set to level "2" by default). In general the higher the level, the more semantically specific the term is.

"Perform GO categories enrichment analysis?": calculate enrichment test for GO terms based on hypergeometric distribution (set to "Yes")

"P-value cut off": P-value threshold value for the declaration of significance (default is < 0.01)

"Q-value cut off": to prevent high false discovery rate (FDR) in multiple testing, Q-values (adjusted P-values) are estimated for FDR control. (default is < 0.05)

"Define your own background IDs?": by default the whole genome/proteome is used as a reference background to compute the enrichment. As this reference set should normally only include genes/proteins that were monitored during your analysis, this option allows to provide your own background; this could be for instance, the total number of genes/proteins expressed in the tissue/sample under study.

If you want to use your own background, click on the "Yes" button. Your gene/protein set must be a list of Entrez gene ID or Uniprot accession number (otherwise, use the ID-Converter tool of ProteoRE). Select the file containing your list of ID (as background), then specify the column number which contains IDs and the type of IDs (gene Entrez or Uniprot Accession number) as requested.

Of note: for Human species, you can build your own background by using the "Build tissue-specific expression dataset" tool of ProteoRE.

-----

**Output**

Diagram output: graphical output in the form of bar-plot or dot-plot (png, jpeg or pdf format), one figure for each GO category.
Text tables: with the following information GO category description (e.g.BP.Description), GO term identifier (e.g. BP.GOID) and GO term frequency (e.g. BP.Frequency)d graphics representing the repartition and/or enrichment of GO categories. One table and one graphic will be produced for each GO catagory.

-----

**Authors**

G Yu, LG Wang, Y Han, QY He. clusterProfiler: an R package for comparing biological themes among gene clusters.
OMICS: A Journal of Integrative Biology 2012, 16(5):284-287. doi:[10.1089/omi.2011.0118](http://dx.doi.org/10.1089/omi.2011.0118)

User manual / Documentation of the clusterProfiler R package (functions and parameters):
https://bioconductor.org/packages/3.7/bioc/vignettes/clusterProfiler/inst/doc/clusterProfiler.html

-----

.. class:: infomark

Bioconductor Packages used:

    - bioconductor-org.hs.eg.db v3.12.0
    - bioconductor-org.mm.eg.db v3.12.0
    - bioconductor-org.rn.eg.db v3.12.0
    - dose v3.16.0
    - clusterprofiler v 3.18.1

.. class:: infomark

**Galaxy integration**

Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Christophe Caron, Valentin Loux - INRAE, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about this tool.


    ]]></help>
    <citations>
    </citations>
</tool>