annotate mogsa.xml @ 0:d3eba0ce0908 draft

Uploaded
author mora-lab
date Thu, 20 May 2021 08:48:27 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
1 <tool id="mogsa" name="mogsa" version="0.1.0">
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
2 <description>Integrative single sample gene-set analysis of multiple omics data</description>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
3
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
4 <requirements>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
5 <requirement type="package" version="1.20.3">r-getopt</requirement>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
6 <requirement type="package" version="1.24.0">bioconductor-mogsa</requirement>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
7 <requirement type="package" version="4.2.3">r-openxlsx</requirement>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
8 </requirements>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
9
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
10 <command detect_errors="exit_code"><![CDATA[
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
11 Rscript '$__tool_directory__/mogsa.R'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
12 --data_file '$data_file'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
13 --geneSet_file '$geneSet'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
14 --design_file '$desigin'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
15 --PC_number '$pc_number'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
16 --w_data '$adv.w_data'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
17 --proc_row '$adv.proc_row'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
18 --ks_B '$adv.ks_B'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
19 --p_adjust_method '$adv.p_adjust_method'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
20 --output_file1 '$mogsa_result_geneSetScoreMatrix'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
21 --output_file2 '$mogsa_result_pvalueMatrix'
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
22
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
23 ]]></command>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
24
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
25 <inputs>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
26 <param name="data_file" type="data" format="xlsx" label="Omics data" help="An Excel file including multiple omics datasets. Each sheet has one matrix for omics data. See help section for more details." />
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
27 <param name="desigin" type="data" format="CSV" label="Design" help="A csv file with three columns for sample, label and color. See help section for more details."/>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
28 <param name="geneSet" type="data" format="rdata" label="Gene Set" help="An rdata file including a 'geneSet' variable that is a geneSetCollection object. See help section for more details."/>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
29 <param name="pc_number" type="integer" value="3" min="2" label="PC number" help="Number of principal components to be used." />
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
30
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
31 <section name="adv" title="Advance Options">
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
32 <param name="w_data" type="select" label="The weights of each separate dataset" >
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
33 <option value="uniform">no weighting</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
34 <option value="lambda1">weighted by the reverse of the first eigenvalue of each individual dataset</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
35 <option value="inertia" selected="true">weighted by the reverse of the total inertia</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
36 </param>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
37 <param name="proc_row" type="select" label="Preprocessing of rows of datasets">
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
38 <option value="none">no preprocessing</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
39 <option value="center">center only</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
40 <option value="center_ssq1" selected="true">center and scale (sum of squared values equals 1)</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
41 <option value="center_ssqN">center and scale (sum of squared values equals the number of columns)</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
42 <option value="center_ssqNm1">center and scale (sum of squared values equals the number of columns - 1) </option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
43 </param>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
44 <param name="ks_B" type="integer" value="1000" min="100" label="The number of bootstrapping samples" help="An integer to indicate the number of bootstrapping samples to calculate the p-value of KS statistic." />
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
45 <param name="p_adjust_method" type="select" label="P-value adjustment method">
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
46 <option value="holm">holm</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
47 <option value="hochberg">hochberg</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
48 <option value="hommel">hommel</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
49 <option value="bonferroni">bonferroni</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
50 <option value="BH" selected="true" >BH</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
51 <option value="BY">BY</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
52 <option value="fdr">fdr</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
53 <option value="none">none</option>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
54 </param>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
55 </section>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
56
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
57 </inputs>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
58
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
59 <outputs>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
60 <data name="mogsa_result_geneSetScoreMatrix" format="csv" label="mogsa_result_geneSetScoreMatrix" />
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
61 <data name="mogsa_result_pvalueMatrix" format="csv" label="mogsa_result_pvalueMatrix" />
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
62 </outputs>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
63
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
64 <help><![CDATA[
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
65
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
66 .. class:: infomark
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
67
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
68 **What it does**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
69
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
70 mogsa (Multiple Omics data integration and Gene Set Analysis) is an integrative multi-omics single-sample gene set analysis method.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
71 The method learns a low dimensional representation of most variant correlated features (genes, proteins, etc.) across multiple omics data sets, transforms the features onto the same scale and calculates an integrated gene-set score from the most informative features in each data type. mogsa does not require filtering data to the intersection of features (gene IDs); therefore, all molecular features, including those that lack annotation may be included in the analysis.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
72
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
73 ---------
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
74
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
75 ==========
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
76 **Inputs**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
77 ==========
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
78
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
79 Basic Options
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
80 --------------
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
81
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
82 **Omics data**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
83
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
84 **Omics data** refers to an excel file with multiple sheets, each of them including a different matrix of omics data. Sheet names are the source of omics data. The matrix must have the same rownames and colnames, where rows are genes and columns are samples.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
85
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
86 **Design**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
87
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
88 **Design** refers to a csv file, which has three columns called `sample` and `label`. The `sample` column corresponds to omics data, and `label` is for different type of `sample`.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
89
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
90 **Gene Set**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
91
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
92 **Gene Sets** is an `rdata` file including a 'geneSet' variable (which is a geneSetCollection object built by the `GSEABase` package). You can use the **GeneSet from Msigdb/KEGG** tool to get this file. You must pay attention to set the same gene id type as in the omics dataset (the gene IDs should be the same as in the rownames of omics data).
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
93
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
94 **PC number**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
95
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
96 In practice, one needs to determine how many PCs should be retained in the step of reconstructing the gene set score matrix. In our results, we plot a scree plot of the eigenvalues, which result from the multivariate analysis. We can take the better number of PC as input. Default as 3.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
97
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
98 Advanced Options
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
99 -----------------
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
100
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
101 **The weights of each separate dataset**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
102 The weights of each separate dataset should be one of uniform - no weighting, lambda1 - weighted by the reverse of the first eigenvalue of each individual dataset, or inertia - weighted by the reverse of the total inertia.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
103
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
104 MFA (multiple factorial analysis) corresponds to choosing `lambda1`.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
105
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
106 **Preprocessing of rows of datasets**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
107 Preprocessing of rows of datasets, should be one of none - no preprocessing, center - center only, center_ssq1 - center and scale (sum of squared values equals 1), center_ssqN - center and scale (sum of squared values equals the number of columns), center_ssqNm1 - center and scale (sum of squared values equals the number of columns - 1).
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
108
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
109 MFA (multiple factorial analysis) corresponds to choosing `center_ssq1`.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
110
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
111 **The number of bootstrapping samples**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
112 An integer to indicate the number of bootstrapping samples to calculate the p-value of the KS statistic. Default is 1000.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
113
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
114 **P-value adjustment method**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
115 Choose one method to adjust p-value among: `"BH"`, `"holm"`, `"hochberg"`, `"hommel"`, `"bonferroni"`, `"BY"`, `"fdr"` and `"none"`. Default is `"BH"`.
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
116
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
117 ------
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
118
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
119 ==========
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
120 **Output**
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
121 ==========
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
122
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
123 1. gene set score matrix,
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
124 2. P-value matrix
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
125
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
126 ]]></help>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
127
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
128 <citations>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
129 <citation type="doi">10.1074/mcp.TIR118.001251</citation>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
130 </citations>
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
131
d3eba0ce0908 Uploaded
mora-lab
parents:
diff changeset
132 </tool>