Mercurial > repos > mora-lab > spia
changeset 1:401181d40d7a draft
Uploaded
author | mora-lab |
---|---|
date | Thu, 20 May 2021 08:52:23 +0000 |
parents | 42c80b0324fc |
children | 1928527cb55d |
files | SPIA.xml |
diffstat | 1 files changed, 169 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/SPIA.xml Thu May 20 08:52:23 2021 +0000 @@ -0,0 +1,169 @@ +<tool id="SPIA" name="SPIA (Signaling Pathway Impact Analysis)" version="0.1.0" > + <description>A method based on over-representation and signaling perturbation accumulation to analyze KEGG signaling pathways.</description> + + <requirements> + <requirement type="package" version="1.20.3">r-getopt</requirement> + <requirement type="package" version="2.42.0">bioconductor-SPIA</requirement> + <requirement type="package" version="2.14.0">bioconductor-chipenrich.data</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + Rscript '$__tool_directory__/SPIA.R' + -D '$input_data' + -O '$organism' + -R '$sigP_output' + + -P '$adv.P_value_threshold' + + -N '$adv.Number_bootstrap' + -C '$adv.method_combine_pvalue' + #if $adv.plot_perturbation=="True": + -W + -L '$SPIA_Perturbation_Plots' + #end if + + #if $adv.pathwayId !="": + -I '$adv.pathwayId' + #end if + + ]]></command> + + <inputs> + <param type="data" name="input_data" format="csv" multiple="false" label="Input data" help="A csv file including the columns ENTREZ, logFC, and adj.P.Val"/> + <param type="text" name="organism" value="hsa" label="Organism" help="A three letter character designating the organism. Default is `hsa` (human). See a full list at https://www.genome.jp/kegg/catalog/org_list.html" /> + + <section name="adv" title="Advanced Options" expanded="false"> + <param type="float" name="P_value_threshold" label="P value threshold to select DEgenes" value="0.05" min="0.00" max="1.00" help="Set a threshold value to define differentially expressed genes"/> + <param type="integer" name="Number_bootstrap" value="2000" min="100" label="Bootstrap iterations" help="Number of bootstrap iterations used to compute the P PERT value. Should be larger than 100. A recommended value is 2000." /> + <param type="select" name="method_combine_pvalue" label="Method to combine P values" help="Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method."> + <option value="fisher" selected="True">fisher</option> + <option value="norminv">norminv</option> + </param> + <param type="boolean" name="plot_perturbation" truevalue="True" falsevalue="False" checked="False" label="Plot perturbation" help="If set to Yes, plot the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is No." /> + <param type="text" name="pathwayId" value="" label="Pathway IDs -- default as NULL and analysis all pathway. " help="Special one or more pathway to analysis, input pathway ID at here. For example: 03018, 03320."/> + </section> + + </inputs> + + <outputs> + <data name="sigP_output" format="csv" label="SPIA_enrich_kegg" /> + <data format="pdf" name="SPIA_Perturbation_Plots" label="SPIA_Perturbation_Plots"> + <filter>adv['plot_perturbation'] == True</filter> + </data> + </outputs> + + <tests> + <test> + <param name="input_data" value="SPIA_input.csv" ftype="csv" /> + <output name="sigP_output" file="x.csv" ftype="csv" /> + </test> + </tests> + + <help><![CDATA[ + + .. class:: infomark + + **What it does** + + SPIA (Signaling pathway impact analysis) combines the evidence obtained from the + classical enrichment analysis with a novel type of evidence, which measures the actual + perturbation on a given pathway under a given condition. + + A bootstap procedure is used to assess the significance of the observed total pathway perturbation. + + Then we can calculate a global pathway significance P-value, which combines the enrichment and perturbation P-values. + + SPIA tool analyzes KEGG signaling pathways. + +------- + +========= +**Input** +========= + +Basic options +-------------- + +**Input data** + +The input data is a csv file, which includes the columns `ENTREZ`, `logFC` and `adj.P.Val`. +This file contains all genes of your dataset. + + ====== ========== ======= ========== ========= ==== ======== + logFC AveExpr t P.Value adj.P.Val B ENTREZ + ====== ========== ======= ========== ========= ==== ======== + 5.96 6.23 23.9 1.79e-17 9.78e-13 25.4 3491 + 5.14 7.49 17.4 1.56e-14 2.84e-10 21.0 2353 + 4.15 7.04 16.5 5.15e-14 7.04e-10 20.1 1958 + 2.43 9.59 14.1 1.29e-12 1.41e- 8 17.7 1843 + 1.53 8.22 11.0 1.69e-10 1.15e- 6 13.6 3725 + 1.43 5.33 10.5 4.27e-10 2.42e- 6 12.8 23645 + ====== ========== ======= ========== ========= ==== ======== + +**Organism** + +A three letter word designating the organism of your data. Default is `hsa` (Human). See a full list of options at https://www.genome.jp/kegg/catalog/org_list.html. + +------ + +Advanced Options +----------------- + +**P value threshold to select DEgenes** + +Set a threshold value to define differentially expressed genes. Default is 0.05. + +**Bootstrap iterations** + +Number of bootstrap iterations used to compute the `pPERT` value. Should be larger than 100. A recommended value is 2000. + +**Method to combine P values** + +Method used to combine the two types of p-values. If set to 'fisher' it will use Fisher's method. If set to 'norminv' it will use the normal inversion method. + +**Plot perturbation** + +If set to `Yes`, plots the gene perturbation accumulation vs log2 fold change for every gene on each pathway. Default is `No`. + +**Pathway IDs -- default as NULL and analysis all pathway.** + +if you want special one or more pathway to analysis, Input pathway id at here. for example: `03018, 03320`. + +------ + +========== +**Output** +========== + + **CSV file** + + This file contains the ranked pathways and various statistics: + - **Name** is the pathway name; + - **ID** is the pathway ID; + - **pSize** is the number of genes on the pathway; + - **NDE** is the number of DE genes per pathway; + - **tA** is the observed total perturbation accumulation in the pathway; + - **pNDE** is the probability to observe at least NDE genes on the pathway using a hypergeometric model; + - **pPERT** is the probability to observe a total accumulation more extreme than tA only by chance; + - **pG** is the p-value obtained by combining pNDE and pPERT; + - **pGFdr** and **pGFWER** are the False Discovery Rate and Bonferroni adjusted global p-values; + - **Status** gives the direction in which the pathway is perturbed (activated or inhibited). + - **KEGGLINK** gives a web link to the KEGG website that displays the pathway image with the differentially expressed genes highlighted in red. + + **PDF file** + + If the plot argument is set to `Yes`, it will output the plots for the gene perturbation accumulation vs log2 fold change for every gene on each pathway. + +------ + +Please cite SPIA_ appropriately if you use them. + +.. _SPIA: https://pubmed.ncbi.nlm.nih.gov/18990722/ + + ]]></help> + + <citations> + <citation type="doi">10.1093/bioinformatics/btn577</citation> + </citations> + +</tool>