view topGO.xml @ 16:7f1ce70f0f09 draft default tip

"planemo upload commit 29a04769c0546be759c38bbcc157c8bc09ec1115-dirty"
author proteore
date Mon, 17 May 2021 14:40:03 +0000
parents 159596ec807d
children
line wrap: on
line source

<tool id="topGO" name="Enrichment analysis" version="2021.04.12">
    <description>(Human, Mouse, Rat)[topGO]</description>
    <requirements>
        <requirement type="package">R</requirement>
        <requirement type="package">r-ggplot2</requirement>
        <requirement type="package" version="3.8.2">bioconductor-org.hs.eg.db</requirement>
        <requirement type="package" version="3.8.2">bioconductor-org.mm.eg.db</requirement>
        <requirement type="package" version="3.8.2">bioconductor-org.rn.eg.db</requirement>
        <requirement type="package" version="1.62.0">bioconductor-graph</requirement>
        <requirement type="package" version="1.46.0">bioconductor-annotationdbi</requirement>
        <requirement type="package" version="3.8.2">bioconductor-go.db</requirement>
        <requirement type="package" version="2.36.0">bioconductor-topgo</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" />
    </stdio>
    <command><![CDATA[

  Rscript --vanilla $__tool_directory__/topGO_enrichment.R
  --inputtype="$inputtype.filetype"
  --input='$inputtype.genelist'

  #if $inputtype.filetype == "file"
    --column='$inputtype.column'
    --header='$inputtype.header'
  #end if

  --ontology='$ontocat'
  --option='$option'
  --threshold='$threshold'
  --correction='$correction'
  --textoutput='true'
  --plot='$plot'
  --geneuniverse='$geneuniverse'
  --background="$background_genes.background"

  #if $background_genes.background == "true"
    --background_genes="$background_genes.inputtype.genelist"
    --background_input_type="$background_genes.inputtype.filetype"
    #if $background_genes.inputtype.filetype == "file"
      --background_header="$background_genes.inputtype.header"
      --background_column="$background_genes.inputtype.column"
    #end if
  #end if

    ]]></command>

    <inputs>
  <conditional name="inputtype">
    <param name="filetype" type="select" label="Enter your IDs (Ensembl Gene Id)" help="Copy/paste or from a file">
      <option value="file" selected="true">Input file containing your IDs</option>
      <option value="copy_paste">Copy/paste your list of IDs</option>
    </param>
    <when value="copy_paste">
      <param name="genelist" type="text" label="Enter a list of IDs">
        <sanitizer>
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
            <mapping initial="none">
                <add source="&apos;" target="__sq__"/>
            </mapping>
        </sanitizer>
      </param>
    </when>
    <when value="file">
      <param name="genelist" type="data" format="txt,tabular" label="Select your file" help=""/>
      <param name="column" type="text" label="Column number of IDs" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on">
        <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
      </param>
      <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
    </when>
  </conditional>
  <conditional name="background_genes">
    <param name="background" type="boolean" checked="false" truevalue="true" falsevalue="false" label="Use your own background (ID list) ?"/>
    <when value="true">
      <conditional name="inputtype">
        <param name="filetype" type="select" label="Enter your background IDs (Ensembl gene IDs)" help="(e.g : ENSG00000139618)">
          <option value="file" selected="true">Input file containing your background IDs</option>
          <option value="copy_paste">Copy/paste your background IDs</option>
        </param>
        <when value="copy_paste">
          <param name="genelist" type="text" label="Copy/paste your background IDs" help="IDs must be separated by spaces into the form field, for example: ENSG00000139618 ENSG00000007350">
            <sanitizer>
            <valid initial="string.printable">
                <remove value="&apos;"/>
            </valid>
            <mapping initial="none">
                <add source="&apos;" target="__sq__"/>
            </mapping>
            </sanitizer>
          </param>
        </when>
        <when value="file">
          <param name="genelist" type="data" format="txt,tabular" label="Select file that contains your background IDs list" help=""/>
          <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header ?" />
          <param name="column" type="text" label="Column number of IDs" value="c1" help="For example, fill in 'c1' if it is the first column, 'c2' if it is the second column and so on">
            <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
          </param>
        </when>
      </conditional>
    </when>
    <when value="false"/>
  </conditional>
    <param name="geneuniverse" type="select" label="Species (human, mouse, rat)">
      <!--option value="org.At.tair.db" >Arabidopsis</option-->
      <!--option value="org.Ce.eg.db" >Worm (C. elegans)</option-->
      <!--option value="org.Dm.eg.db" >Fly (D. melanogaster)</option-->
      <option value="org.Hs.eg.db" selected="true">Human (Homo sapiens)</option>
      <option value="org.Mm.eg.db" >Mouse (Mouse musculus)</option>
      <option value="org.Rn.eg.db" >Rat (Rattus norvegicus)</option>
      <!--option value="org.Sc.sgd.db" >Yeast (S. cerevisiae)</option-->
    </param>
    <param name="ontocat" type="select" label="GO terms category">
      <option value="BP" >Biological Process</option>
      <option value="CC" >Cellular Component</option>
      <option value="MF" >Molecular Function</option>
    </param>
    <param name="option" type="select" label="Select GO scoring method (read user doc section)">
      <option value="classic" >Classic Fisher test</option>
      <option value="elim" selected="true">Elim</option>
      <option value="weight01" >Weight01</option>
      <option value="parentchild" >ParentChild</option>
    </param>
    <param name="threshold" type="text" label="p-value threshold (e.g : 1e-3)" value="1e-3"/>
	  <param name="correction" label="Multiple testing procedure (p-value adjustment)" type="select">
      <option value="none" >None</option>
      <option value="holm">Holm correction</option>
      <option value="hochberg" >Hochberg correction</option>
      <option value="hommel" >Hommel correction</option>
      <option value="bonferroni" >Bonferroni correction</option>
      <option value="BH" selected="true">Benjamini and Hochberg</option>
      <option value="BY" >Benjamini and Yekutieli</option>
      <option value="fdr" >FDR</option>
    </param>
    <!--param name="textoutput" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Generate a text file for results" /-->
    <param name="plot" type="select" display="checkboxes" multiple="true" label="Graphical display" optional="false">
      <option selected = "true" value="dotplot">dot-plot</option>
      <option value="barplot">bar-plot</option>
    </param>
    </inputs>
    <outputs>

      <data name="outputtext" format="tsv" label="Text output for topGO analysis $ontocat category" from_work_dir="result.tsv">
        <!--filter>textoutput</filter-->
      </data>

      <data name="outputdotplot" format="png" label="Dotplot output for topGO analysis $ontocat category" from_work_dir="dotplot.png">
        <filter>'dotplot' in plot</filter>
      </data>

      <data name="outputbarplot" format="png" label="Barplot output for topGO analysis $ontocat category" from_work_dir="barplot.png">
        <filter>'barplot' in plot</filter>
      </data>

   </outputs>
   <tests>
     <test>
       <conditional name="inputtype">
         <param name="filetype " value="tabfile"/>
         <param name="genelist" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt"/>
         <param name="column" value="c8"/>
         <param name="header" value="TRUE"/>
       </conditional>
       <param name="ontocat" value="BP"/>
       <param name="option" value="elim"/>
       <param name="threshold" value="1e-3"/>
       <param name="correction" value="BH"/>
       <param name="textoutput" value="TRUE"/>
       <param name="plot" value="dotplot,barplot"/>
       <param name="geneuniverse" value="org.Hs.eg.db"/>
       <output name="outputtext" file="result.tsv"/>
     </test>
   </tests>
   <help><![CDATA[


**Description**

This tool is based on R package topGO. topGO package provides tools for testing GO terms while accounting for the topology of the GO graph. Different test statistics and different methods for eliminating local similarities and dependencies between GO terms can be applied.

This component computes the GO terms representativity of a gene list in one ontology category (Biological Process "BP", Cellular Component "CC", Molecular Function "MF"). This representativity is evaluated in comparison to the background list of all genes/proteins (of the selected species) associated associated with GO terms of the chosen category (BP,CC,MF).

-----

**Input required**

This component works with Ensembl gene IDs (e.g : ENSG0000013618). You can copy/paste these identifiers or supply a tabular file (.csv, .tsv, .txt, .tab)
and then specifying the column number that contains the ENSG IDs.

-----

**Parameters**

"Species": "Species": the three available species are Homo sapiens, Mus musculus and Rattus norvegicus

"GO terms category": select either Biogical Process (BP)(by default), Cellular Component (CC) or Molecular Function (MF)

"Select GO scoring method: topGO provides the commonly used Fisher test for evaluating which GO terms are over-represented in your gene/protein list; it also provides other GO scoring algorithms (i.e. Elim, Weight01, Parentchild). For the merits of each option and their algorithmic description, please refer to topGO manual:
https://bioconductor.org/packages/release/bioc/vignettes/topGO/inst/doc/topGO.pdf

"p-value threshold (e.g : 1e-3)": must be in the form of "1e-5" (i.e. 0.00001)

"Multiple testing procedure (p-value adjustment): several FDR procedure for multiple testing and p-value adjustment are available: Holm, Hochberg
Hommel, Bonferroni, BH (Benjamini-Hochberg), BY (Benjamini-Yekutieli), FDR. Default is BH (most commonly used)

-----

**Output**

Two types of output are available: textual and/or graphical outputs (barplot and/or a dotplot (set by default)).

*Textual output*

The text output lists all the GO-terms that were found significantly enriched according to the specified threshold (p-value).

The different fields are as follow:

- Annotated : number of genes in the selected species that are annotated with the GO-term.

- Significant : number of genes belonging to your input annotated with the GO-term.

- Expected : represents the expected number of interesting genes mapped to the GO term if the interesting genes were randomly distributed over all GO terms.

- p-values : p-value obtained after the test

- ( q-values  : additional column with adjusted pvalues )

-----

.. class:: infomark

Packages used:
    - bioconductor-org.hs.eg.db v3.8.2
    - bioconductor-org.mm.eg.db v3.8.2
    - bioconductor-org.rn.eg.db v3.8.2
    - bioconductor-annotationdbi v1.46.0
    - bioconductor-go.db v3.8.2
    - bioconductor-graph v1.62.0
    - bioconductor-topgo v2.36.0

-----

.. class:: infomark

**Authors**

Alexa A, Rahnenführer J, Lengauer T. Improved scoring of functional groups from gene expression data by decorrelating GO graph structure. Bioinformatics. 2006. 22(13):1600-7. PubMed PMID: 16606683.

-----

.. class:: infomark

**Galaxy integration**

Lisa Perus, Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR

Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR

This work has been partially funded through the French National Agency for Research (ANR) IFB project.

Help: contact@proteore.org for any questions or concerns about this tool.

]]></help>
   <citations>
   </citations>

</tool>