diff mogsa.xml @ 0:d3eba0ce0908 draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:48:27 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mogsa.xml	Thu May 20 08:48:27 2021 +0000
@@ -0,0 +1,132 @@
+<tool id="mogsa" name="mogsa" version="0.1.0">
+    <description>Integrative single sample gene-set analysis of multiple omics data</description>
+
+    <requirements>
+        <requirement type="package" version="1.20.3">r-getopt</requirement>
+        <requirement type="package" version="1.24.0">bioconductor-mogsa</requirement>
+        <requirement type="package" version="4.2.3">r-openxlsx</requirement>
+    </requirements>
+
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript '$__tool_directory__/mogsa.R' 
+        --data_file  '$data_file' 
+        --geneSet_file '$geneSet' 
+        --design_file '$desigin' 
+        --PC_number '$pc_number' 
+        --w_data '$adv.w_data'  
+        --proc_row '$adv.proc_row' 
+        --ks_B '$adv.ks_B' 
+        --p_adjust_method '$adv.p_adjust_method' 
+        --output_file1 '$mogsa_result_geneSetScoreMatrix' 
+        --output_file2 '$mogsa_result_pvalueMatrix'
+
+    ]]></command>
+
+    <inputs>
+        <param name="data_file" type="data" format="xlsx" label="Omics data" help="An Excel file including multiple omics datasets. Each sheet has one matrix for omics data. See help section for more details." />
+        <param name="desigin" type="data" format="CSV" label="Design" help="A csv file with three columns for sample, label and color. See help section for more details."/>
+        <param name="geneSet" type="data" format="rdata" label="Gene Set" help="An rdata file including a 'geneSet' variable that is a geneSetCollection object. See help section for more details."/>
+        <param name="pc_number" type="integer" value="3" min="2" label="PC number" help="Number of principal components to be used." />
+        
+        <section name="adv" title="Advance Options">
+            <param name="w_data" type="select" label="The weights of each separate dataset" >
+                <option value="uniform">no weighting</option>
+                <option value="lambda1">weighted by the reverse of the first eigenvalue of each individual dataset</option>
+                <option value="inertia" selected="true">weighted by the reverse of the total inertia</option>
+            </param>
+            <param name="proc_row" type="select" label="Preprocessing of rows of datasets">
+                <option value="none">no preprocessing</option>
+                <option value="center">center only</option>
+                <option value="center_ssq1" selected="true">center and scale (sum of squared values equals 1)</option>
+                <option value="center_ssqN">center and scale (sum of squared values equals the number of columns)</option>
+                <option value="center_ssqNm1">center and scale (sum of squared values equals the number of columns - 1) </option>
+            </param>
+            <param name="ks_B" type="integer" value="1000" min="100" label="The number of bootstrapping samples"  help="An integer to indicate the number of bootstrapping samples to calculate the p-value of KS statistic." />
+            <param name="p_adjust_method" type="select" label="P-value adjustment method">
+                <option value="holm">holm</option>
+                <option value="hochberg">hochberg</option>
+                <option value="hommel">hommel</option>
+                <option value="bonferroni">bonferroni</option>
+                <option value="BH" selected="true" >BH</option>
+                <option value="BY">BY</option>
+                <option value="fdr">fdr</option>
+                <option value="none">none</option>
+            </param>
+        </section>
+        
+    </inputs>
+
+    <outputs>
+		<data name="mogsa_result_geneSetScoreMatrix" format="csv" label="mogsa_result_geneSetScoreMatrix" />
+        <data name="mogsa_result_pvalueMatrix" format="csv" label="mogsa_result_pvalueMatrix" />
+    </outputs>
+
+    <help><![CDATA[
+
+.. class:: infomark 
+
+**What it does**
+
+mogsa (Multiple Omics data integration and Gene Set Analysis) is an integrative multi-omics single-sample gene set analysis method. 
+The method learns a low dimensional representation of most variant correlated features (genes, proteins, etc.) across multiple omics data sets,  transforms the features onto the same scale and calculates an integrated gene-set score from the most informative features in each data type. mogsa does not require filtering data to the intersection of features (gene IDs); therefore, all molecular features, including those that lack  annotation may be included in the analysis.
+
+---------
+
+==========
+**Inputs**
+==========
+
+Basic Options
+--------------
+
+**Omics data**
+
+**Omics data** refers to an excel file with multiple sheets, each of them including a different matrix of omics data. Sheet names are the source of omics data. The matrix must have the same rownames and colnames, where rows are genes and columns are samples.
+
+**Design**
+
+**Design** refers to a csv file, which has three columns called `sample` and `label`. The `sample` column corresponds to omics data, and `label` is for different type of `sample`.
+
+**Gene Set**
+
+**Gene Sets** is an `rdata` file including a 'geneSet' variable (which is a geneSetCollection object built by the `GSEABase` package). You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the omics dataset (the gene IDs should be the same as in the rownames of omics data).
+
+**PC number**
+
+In practice, one needs to determine how many PCs should be retained in the step of reconstructing the gene set score matrix. In our results, we plot a scree plot of the eigenvalues, which result from the multivariate analysis. We can take the better number of PC as input. Default as 3.
+
+Advanced Options
+-----------------
+
+**The weights of each separate dataset**
+The weights of each separate dataset should be one of uniform - no weighting, lambda1 - weighted by the reverse of the first eigenvalue of each individual dataset, or inertia - weighted by the reverse of the total inertia. 
+
+MFA (multiple factorial analysis) corresponds to choosing `lambda1`.
+
+**Preprocessing of rows of datasets**
+Preprocessing of rows of datasets, should be one of none - no preprocessing, center - center only, center_ssq1 - center and scale (sum of squared values equals 1), center_ssqN - center and scale (sum of squared values equals the number of columns), center_ssqNm1 - center and scale (sum of squared values equals the number of columns - 1).
+
+MFA (multiple factorial analysis) corresponds to choosing `center_ssq1`.
+
+**The number of bootstrapping samples**
+An integer to indicate the number of bootstrapping samples to calculate the p-value of the KS statistic. Default is 1000.
+
+**P-value adjustment method**
+Choose one method to adjust p-value among: `"BH"`, `"holm"`, `"hochberg"`, `"hommel"`, `"bonferroni"`, `"BY"`, `"fdr"` and `"none"`. Default is `"BH"`.
+
+------
+
+==========
+**Output**
+==========
+
+1. gene set score matrix, 
+2. P-value matrix
+        
+    ]]></help>
+
+    <citations>
+        <citation type="doi">10.1074/mcp.TIR118.001251</citation>
+    </citations>
+
+</tool>