view SPIA.xml @ 2:1928527cb55d draft

Uploaded
author mora-lab
date Thu, 20 May 2021 12:18:48 +0000
parents 401181d40d7a
children
line wrap: on
line source

<tool id="SPIA" name="SPIA (Signaling Pathway Impact Analysis)" version="0.1.0" >
    <description>A method based on over-representation and signaling perturbation accumulation to analyze KEGG signaling pathways.</description>

    <requirements>
        <requirement type="package" version="1.20.3">r-getopt</requirement>
        <requirement type="package" version="2.42.0">bioconductor-SPIA</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        Rscript '$__tool_directory__/SPIA.R'
        -D '$input_data' 
        -O '$organism' 
        -R '$sigP_output'
        
        -P '$adv.P_value_threshold'  

        -N '$adv.Number_bootstrap' 
        -C '$adv.method_combine_pvalue' 
        #if $adv.plot_perturbation=="True":
        -W 
        -L '$SPIA_Perturbation_Plots'
        #end if
        
        #if $adv.pathwayId !="":
        -I '$adv.pathwayId'
        #end if
        
    ]]></command>

    <inputs>
        <param type="data" name="input_data" format="csv" multiple="false" label="Input data" help="A csv file including the columns ENTREZ, logFC, and adj.P.Val"/>
        <param type="text" name="organism" value="hsa" label="Organism" help="A three letter character designating the organism. Default is `hsa` (human). See a full list at https://www.genome.jp/kegg/catalog/org_list.html" />
        
        <section name="adv" title="Advanced Options" expanded="false">
            <param type="float" name="P_value_threshold" label="P value threshold to select DEgenes" value="0.05" min="0.00" max="1.00" help="Set a threshold value to define differentially expressed genes"/>
            <param type="integer" name="Number_bootstrap" value="2000" min="100" label="Bootstrap iterations" help="Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000." />
            <param type="select" name="method_combine_pvalue" label="Method to combine P values" help="Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.">
                <option value="fisher" selected="True">fisher</option>
                <option value="norminv">norminv</option>
            </param>
            <param type="boolean" name="plot_perturbation" truevalue="True" falsevalue="False" checked="False" label="Plot perturbation" help="If set to Yes, plot the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is No." />    
            <param type="text" name="pathwayId" value="" label="Pathway IDs -- default as NULL and analysis all pathway. " help="Special one or more pathway to analysis, input pathway ID at here. For example: 03018, 03320."/>
        </section>
        
    </inputs>

    <outputs>
        <data name="sigP_output" format="csv" label="SPIA_enrich_kegg" />
        <data format="pdf" name="SPIA_Perturbation_Plots" label="SPIA_Perturbation_Plots">
            <filter>adv['plot_perturbation'] == True</filter>
        </data>
    </outputs>

    <tests>
        <test>
            <param name="input_data" value="SPIA_input.csv" ftype="csv" />
            <output  name="sigP_output" file="x.csv" ftype="csv" />
        </test>
    </tests>

    <help><![CDATA[
        
    .. class:: infomark
    
    **What it does**
    
    SPIA (Signaling pathway impact analysis) combines the evidence obtained from the 
    classical enrichment analysis with a novel type of evidence, which measures the actual
    perturbation on a given pathway under a given condition.
    
    A bootstap procedure is used to assess the significance of the observed total pathway perturbation.
    
    Then we can calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values.
    
    SPIA tool analyzes KEGG signaling pathways.

-------

=========
**Input**
=========

Basic options
--------------

**Input data** 

The input data is a csv file, which includes the columns `ENTREZ`, `logFC` and `adj.P.Val`.
This file contains all genes of your dataset.

    ====== ========== ======= ==========  ========= ==== ========
     logFC  AveExpr     t       P.Value   adj.P.Val B     ENTREZ
    ====== ========== ======= ==========  ========= ==== ========
    5.96    6.23        23.9    1.79e-17  9.78e-13  25.4   3491
    5.14    7.49        17.4    1.56e-14  2.84e-10  21.0   2353
    4.15    7.04        16.5    5.15e-14  7.04e-10  20.1   1958
    2.43    9.59        14.1    1.29e-12  1.41e- 8  17.7   1843
    1.53    8.22        11.0    1.69e-10  1.15e- 6  13.6   3725
    1.43    5.33        10.5    4.27e-10  2.42e- 6  12.8  23645
    ====== ========== ======= ==========  ========= ==== ========

**Organism**

A three letter word designating the organism of your data. Default is `hsa` (Human). See a full list of options at https://www.genome.jp/kegg/catalog/org_list.html.

------

Advanced Options
-----------------

**P value threshold to select DEgenes**

Set a threshold value to define differentially expressed genes. Default is 0.05.

**Bootstrap iterations**

Number of bootstrap iterations used to compute the `pPERT` value. Should be larger than 100. A recommended value is 2000.

**Method to combine P values**

Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.

**Plot perturbation**

If set to `Yes`, plots the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is `No`.

**Pathway IDs -- default as NULL and analysis all pathway.**

if you want special one or more pathway to analysis, Input pathway id at here. for example: `03018, 03320`.

------

==========
**Output**
==========

    **CSV file**
    
    This file contains the ranked pathways and various statistics:
        - **Name** is the pathway name;
        - **ID** is the pathway ID;
        - **pSize** is the number of genes on the pathway; 
        - **NDE** is the number of DE genes per pathway; 
        - **tA** is the observed total perturbation accumulation in the pathway; 
        - **pNDE** is the probability to observe at least NDE genes on the pathway using a hypergeometric model; 
        - **pPERT** is the probability to observe a total accumulation more extreme than tA only by chance; 
        - **pG** is the p-value obtained by combining pNDE and pPERT; 
        - **pGFdr** and **pGFWER** are the False Discovery Rate and Bonferroni adjusted global p-values; 
        - **Status** gives the direction in which the pathway is perturbed (activated or inhibited). 
        - **KEGGLINK** gives a web link to the KEGG website that displays the pathway image with the differentially expressed genes highlighted in red.
        
    **PDF file**
    
    If the plot argument is set to `Yes`, it will output the plots for the gene perturbation accumulation vs log2 fold change for every gene on each pathway.
    
------

Please cite SPIA_ appropriately if you use them.
             
.. _SPIA: https://pubmed.ncbi.nlm.nih.gov/18990722/   

    ]]></help>
    
    <citations>
       <citation type="doi">10.1093/bioinformatics/btn577</citation>
 </citations>

</tool>