changeset 1:401181d40d7a draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:52:23 +0000
parents 42c80b0324fc
children 1928527cb55d
files SPIA.xml
diffstat 1 files changed, 169 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/SPIA.xml	Thu May 20 08:52:23 2021 +0000
@@ -0,0 +1,169 @@
+<tool id="SPIA" name="SPIA (Signaling Pathway Impact Analysis)" version="0.1.0" >
+    <description>A method based on over-representation and signaling perturbation accumulation to analyze KEGG signaling pathways.</description>
+
+    <requirements>
+        <requirement type="package" version="1.20.3">r-getopt</requirement>
+        <requirement type="package" version="2.42.0">bioconductor-SPIA</requirement>
+        <requirement type="package" version="2.14.0">bioconductor-chipenrich.data</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$__tool_directory__/SPIA.R'
+        -D '$input_data' 
+        -O '$organism' 
+        -R '$sigP_output'
+        
+        -P '$adv.P_value_threshold'  
+
+        -N '$adv.Number_bootstrap' 
+        -C '$adv.method_combine_pvalue' 
+        #if $adv.plot_perturbation=="True":
+        -W 
+        -L '$SPIA_Perturbation_Plots'
+        #end if
+        
+        #if $adv.pathwayId !="":
+        -I '$adv.pathwayId'
+        #end if
+        
+    ]]></command>
+
+    <inputs>
+        <param type="data" name="input_data" format="csv" multiple="false" label="Input data" help="A csv file including the columns ENTREZ, logFC, and adj.P.Val"/>
+        <param type="text" name="organism" value="hsa" label="Organism" help="A three letter character designating the organism. Default is `hsa` (human). See a full list at https://www.genome.jp/kegg/catalog/org_list.html" />
+        
+        <section name="adv" title="Advanced Options" expanded="false">
+            <param type="float" name="P_value_threshold" label="P value threshold to select DEgenes" value="0.05" min="0.00" max="1.00" help="Set a threshold value to define differentially expressed genes"/>
+            <param type="integer" name="Number_bootstrap" value="2000" min="100" label="Bootstrap iterations" help="Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000." />
+            <param type="select" name="method_combine_pvalue" label="Method to combine P values" help="Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.">
+                <option value="fisher" selected="True">fisher</option>
+                <option value="norminv">norminv</option>
+            </param>
+            <param type="boolean" name="plot_perturbation" truevalue="True" falsevalue="False" checked="False" label="Plot perturbation" help="If set to Yes, plot the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is No." />    
+            <param type="text" name="pathwayId" value="" label="Pathway IDs -- default as NULL and analysis all pathway. " help="Special one or more pathway to analysis, input pathway ID at here. For example: 03018, 03320."/>
+        </section>
+        
+    </inputs>
+
+    <outputs>
+        <data name="sigP_output" format="csv" label="SPIA_enrich_kegg" />
+        <data format="pdf" name="SPIA_Perturbation_Plots" label="SPIA_Perturbation_Plots">
+            <filter>adv['plot_perturbation'] == True</filter>
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input_data" value="SPIA_input.csv" ftype="csv" />
+            <output  name="sigP_output" file="x.csv" ftype="csv" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        
+    .. class:: infomark
+    
+    **What it does**
+    
+    SPIA (Signaling pathway impact analysis) combines the evidence obtained from the 
+    classical enrichment analysis with a novel type of evidence, which measures the actual
+    perturbation on a given pathway under a given condition.
+    
+    A bootstap procedure is used to assess the significance of the observed total pathway perturbation.
+    
+    Then we can calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values.
+    
+    SPIA tool analyzes KEGG signaling pathways.
+
+-------
+
+=========
+**Input**
+=========
+
+Basic options
+--------------
+
+**Input data** 
+
+The input data is a csv file, which includes the columns `ENTREZ`, `logFC` and `adj.P.Val`.
+This file contains all genes of your dataset.
+
+    ====== ========== ======= ==========  ========= ==== ========
+     logFC  AveExpr     t       P.Value   adj.P.Val B     ENTREZ
+    ====== ========== ======= ==========  ========= ==== ========
+    5.96    6.23        23.9    1.79e-17  9.78e-13  25.4   3491
+    5.14    7.49        17.4    1.56e-14  2.84e-10  21.0   2353
+    4.15    7.04        16.5    5.15e-14  7.04e-10  20.1   1958
+    2.43    9.59        14.1    1.29e-12  1.41e- 8  17.7   1843
+    1.53    8.22        11.0    1.69e-10  1.15e- 6  13.6   3725
+    1.43    5.33        10.5    4.27e-10  2.42e- 6  12.8  23645
+    ====== ========== ======= ==========  ========= ==== ========
+
+**Organism**
+
+A three letter word designating the organism of your data. Default is `hsa` (Human). See a full list of options at https://www.genome.jp/kegg/catalog/org_list.html.
+
+------
+
+Advanced Options
+-----------------
+
+**P value threshold to select DEgenes**
+
+Set a threshold value to define differentially expressed genes. Default is 0.05.
+
+**Bootstrap iterations**
+
+Number of bootstrap iterations used to compute the `pPERT` value. Should be larger than 100. A recommended value is 2000.
+
+**Method to combine P values**
+
+Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method.
+
+**Plot perturbation**
+
+If set to `Yes`, plots the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is `No`.
+
+**Pathway IDs -- default as NULL and analysis all pathway.**
+
+if you want special one or more pathway to analysis, Input pathway id at here. for example: `03018, 03320`.
+
+------
+
+==========
+**Output**
+==========
+
+    **CSV file**
+    
+    This file contains the ranked pathways and various statistics:
+        - **Name** is the pathway name;
+        - **ID** is the pathway ID;
+        - **pSize** is the number of genes on the pathway; 
+        - **NDE** is the number of DE genes per pathway; 
+        - **tA** is the observed total perturbation accumulation in the pathway; 
+        - **pNDE** is the probability to observe at least NDE genes on the pathway using a hypergeometric model; 
+        - **pPERT** is the probability to observe a total accumulation more extreme than tA only by chance; 
+        - **pG** is the p-value obtained by combining pNDE and pPERT; 
+        - **pGFdr** and **pGFWER** are the False Discovery Rate and Bonferroni adjusted global p-values; 
+        - **Status** gives the direction in which the pathway is perturbed (activated or inhibited). 
+        - **KEGGLINK** gives a web link to the KEGG website that displays the pathway image with the differentially expressed genes highlighted in red.
+        
+    **PDF file**
+    
+    If the plot argument is set to `Yes`, it will output the plots for the gene perturbation accumulation vs log2 fold change for every gene on each pathway.
+    
+------
+
+Please cite SPIA_ appropriately if you use them.
+             
+.. _SPIA: https://pubmed.ncbi.nlm.nih.gov/18990722/   
+
+    ]]></help>
+    
+    <citations>
+       <citation type="doi">10.1093/bioinformatics/btn577</citation>
+ </citations>
+
+</tool>