comparison rgcca.xml @ 0:067d45e6caa9 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author iuc
date Tue, 12 Jan 2021 10:12:04 +0000
parents
children 4e73ea176c34
comparison
equal deleted inserted replaced
-1:000000000000 0:067d45e6caa9
1 <tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0">
2
3 <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description>
4
5 <macros>
6 <import>macro.xml</import>
7 </macros>
8
9 <edam_topics>
10 <edam_topic>topic_2269</edam_topic>
11 </edam_topics>
12
13 <edam_operations>
14 <edam_operation>operation_2945</edam_operation>
15 <edam_operation>operation_3465</edam_operation>
16 <edam_operation>operation_0337</edam_operation>
17 </edam_operations>
18
19 <requirements>
20 <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement>
21 </requirements>
22
23 <command detect_errors="exit_code"><![CDATA[
24 #set data_paths = ",".join([str(_.file_name) for _ in $blocks])
25 #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks])
26 Rscript '$__tool_directory__/launcher.R'
27 --datasets '${data_paths}'
28 --names '${data_names}'
29 --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata'
30 $parse.header
31 --separator $parse.separator
32 $analyse.superblock
33 $analyse.scale
34 #if $analyse.tau.bool == 'false'
35 --penalty $analyse.tau.value
36 #else
37 --penalty $analyse.tau.bool
38 #end if
39 --ncomp $analyse.ncomp
40 --scheme $analyse.scheme
41 #if $analyse.method.family == '1'
42 --type pca
43 #else
44 --type $analyse.method.type
45 #end if
46 #if $analyse.connection
47 --connection $analyse.connection
48 #end if
49 #if $analyse.supervised.learning_mode == 'supervised'
50 --response $analyse.supervised.block_response
51 #end if
52 #if $graphic.response
53 --group $graphic.response
54 #end if
55 --compx $graphic.compx
56 --compy $graphic.compy
57 --nmark $graphic.nmark
58 $graphic.text
59 --block $graphic.blockx
60 --block_y $graphic.blocky
61 ]]></command>
62
63 <inputs>
64 <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks"
65 help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/>
66
67 <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header.">
68 <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/>
69 <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files).">
70 <option value="1" selected="true">Tabulation</option>
71 <option value="2">Semicolon</option>
72 </param>
73 </section>
74
75 <section name="analyse" title="Advanced analysis"
76 help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block.">
77
78 <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5"
79 help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/>
80
81 <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks"
82 help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/>
83
84 <conditional name="method">
85
86 <param name="family" type="select" label="Analysis method">
87 <option value="1">One block</option>
88 <option value="2">Two blocks</option>
89 <option value="m" selected="true">Multiple blocks</option>
90 <option value="ms">Multiple blocks with superblock</option>
91 </param>
92
93 <when value="2">
94 <param name="type" type="select" label=" ">
95 <option value="pls">Partial Least Squares Regression</option>
96 <option value="cca">Canonical Correlation Analysis</option>
97 <option value="ifa">Interbattery Factor Analysis</option>
98 <option value="ra">Redundancy analysis</option>
99 </param>
100 </when>
101
102 <when value="m">
103 <param name="type" type="select" label=" ">
104 <option value="rgcca">Regularized Generalized CCA</option>
105 <option value="sgcca">Sparse Generalized CCA</option>
106 <option value="sumcor">SUM of CORrelations method</option>
107 <option value="ssqcor">Sum of SQuared CORrelations method</option>
108 <option value="sabscor">Sum of ABSolute value CORrelations method</option>
109 <option value="sumcov">SUM of COVariances method</option>
110 <option value="ssqcov">Sum of SQuared COVariances method</option>
111 <option value="sabscov">Sum of ABSolute value COVariances method</option>
112 <option value="maxbet">MAXBET</option>
113 <option value="maxbet-b">MAXBET-B</option>
114 </param>
115 </when>
116
117 <when value="ms">
118 <param name="type" type="select" label=" ">
119 <option value="gcca">Generalized CCA</option>
120 <option value="hpca">Hierarchical PCA</option>
121 <option value="mfa">Multiple Factor Analysis</option>
122 </param>
123 </when>
124
125 <when value="1"/>
126
127 </conditional>
128
129 <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)"
130 help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/>
131
132 <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock"
133 help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/>
134
135 <conditional name="supervised">
136 <param name="learning_mode" type="select" display="radio" label="Learning mode">
137 <option value="unsupervised">Unsupervised</option>
138 <option value="supervised">Supervised</option>
139 </param>
140 <when value="supervised">
141 <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/>
142 </when>
143 <when value="unsupervised"/>
144 </conditional>
145
146 <conditional name="tau">
147 <param name="bool" type="select" display="radio" label="Tau selection"
148 help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included).">
149 <option value="false">Manual</option>
150 <option value="optimal">Optimal</option>
151 </param>
152 <when value="false">
153 <param name="value" type="float" label=" " value="1" min="0" max="1"/>
154 </when>
155 <when value="optimal"/>
156 </conditional>
157
158 <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme),
159 the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural
160 negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one.">
161 <option value="1">Horst : f(x)</option>
162 <option value="2" selected="true">Factorial : f(x)^2</option>
163 <option value="3">Centroid : f|x|</option>
164 <option value="4">Other: f(x)^4</option>
165 </param>
166
167 </section>
168
169 <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used.">
170 <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable"
171 help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/>
172 <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/>
173 <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/>
174 <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/>
175 <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/>
176 <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/>
177 <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/>
178 </section>
179
180 <param name="output_selector" type="select" multiple="true" label="Outputs">
181 <option value="individuals" selected="true">Individual plot</option>
182 <option value="corcircle" selected = "true">Corcircle plot</option>
183 <option value="top_variables">Top variables plot</option>
184 <option value="ave">Averages plot</option>
185 <option value="design">Design plot</option>
186 <option value="individual_table" selected="true">Individual table</option>
187 <option value="variable_table" selected="true">Variable table</option>
188 <option value="rdata">RData file</option>
189 </param>
190 </inputs>
191
192 <outputs>
193 <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf">
194 <filter>"individuals" in output_selector</filter>
195 </data>
196 <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf">
197 <filter>"corcircle" in output_selector</filter>
198 </data>
199 <data name="top_variables" label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf">
200 <filter>"top_variables" in output_selector</filter>
201 </data>
202 <data name="ave" label="${tool.name} on ${on_string}: ave.pdf" format="pdf">
203 <filter>"ave" in output_selector</filter>
204 </data>
205 <data name="design" label="${tool.name} on ${on_string}: design.pdf" format="pdf">
206 <filter>"design" in output_selector</filter>
207 </data>
208 <data name="individual_table" label="${tool.name} on ${on_string}: individuals.tsv" format="tsv">
209 <filter>"individual_table" in output_selector</filter>
210 </data>
211 <data name="variable_table" label="${tool.name} on ${on_string}: variables.tsv" format="tsv">
212 <filter>"variable_table" in output_selector</filter>
213 </data>
214 <data name="rdata" label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata">
215 <filter>"rdata" in output_selector</filter>
216 </data>
217 </outputs>
218
219 <tests>
220
221 <test expect_num_outputs="8" expect_exit_code="0">
222 <expand macro="output_tests" path="1block"/>
223 <param name="blocks" value="agriculture.tsv" ftype = "tsv"/>
224 <output name="individual_table">
225 <assert_contents>
226 <has_n_columns n="4"/>
227 <has_line_matching
228 expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/>
229 <has_line_matching
230 expression='^.+(\s\-?\d+.\d+){4}$'/>
231 </assert_contents>
232 </output>
233 </test>
234
235 <test expect_num_outputs="8" expect_exit_code="0">
236 <expand macro="output_tests" path="3blocks_connection"/>
237 <expand macro="output_tests_3blocks"/>
238 <section name="analyse">
239 <param name="connection" value="connection.tsv" ftype = "tsv"/>
240 <param name="superblock" value="false"/>
241 </section>
242 <assert_command>
243 <has_text text="-connection"/>
244 <has_text text="--superblock"/>
245 </assert_command>
246 </test>
247
248 <test expect_num_outputs="8" expect_exit_code="0">
249 <expand macro="output_tests" path="3blocks_supervised"/>
250 <expand macro="output_tests_3blocks"/>
251 <section name="analyse">
252 <param name="superblock" value="false"/>
253 <conditional name="supervised" >
254 <param name="learning_mode" value="supervised"/>
255 <param name="block_response" value="3"/>
256 </conditional>
257 </section>
258 <assert_command>
259 <has_text text="--response 3"/>
260 <has_text text="--superblock"/>
261 </assert_command>
262 </test>
263
264 <test expect_num_outputs="8" expect_exit_code="0">
265 <expand macro="output_tests" path="3blocks"/>
266 <expand macro="output_tests_3blocks"/>
267 </test>
268
269 <test expect_num_outputs="8" expect_exit_code="0">
270 <expand macro="output_tests" path="3blocks_sgcca"/>
271 <expand macro="output_tests_3blocks"/>
272 <section name="analyse">
273 <conditional name="method">
274 <param name="family" value="m"/>
275 <param name="type" value="sgcca"/>
276 </conditional>
277 </section>
278 <assert_command>
279 <has_text text="sgcca"/>
280 </assert_command>
281 </test>
282
283 <test expect_num_outputs="8" expect_exit_code="0">
284 <expand macro="output_tests" path="2blocks" compx="3" compy="1"/>
285 <param name="blocks" value="agriculture.tsv,politic.tsv"/>
286 <section name="analyse">
287 <param name="scale" value="false"/>
288 <conditional name="tau">
289 <param name="bool" value="false"/>
290 <param name="value" value="0"/>
291 </conditional>
292 <param name="scheme" value="3"/>
293 <param name="ncomp" value="3"/>
294 <conditional name="method">
295 <param name="family" value="2"/>
296 <param name="type" value="pls"/>
297 </conditional>
298 </section>
299 <section name="graphic">
300 <param name="response" value="political_system.tsv" ftype = "tsv"/>
301 <param name="text" value="false"/>
302 <param name="compx" value="3"/>
303 <param name="compy" value="1"/>
304 <param name="blockx" value="2"/>
305 <param name="blocky" value="1"/>
306 <param name="nmark" value="11"/>
307 </section>
308 <assert_command>
309 <has_text text="pls"/>
310 <has_text text="--group"/>
311 </assert_command>
312 </test>
313
314 </tests>
315 <help>
316
317 ==================================
318 ABOUT
319 ==================================
320
321
322 **Author:**
323 Etienne CAMENEN
324
325
326 **Contact:**
327 arthur.tenenhaus@centralesupelec.fr
328
329
330 **R package:**
331 The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA).
332
333 ---------------------------------------------------
334
335 ==================================
336 R/SGCCA
337 ==================================
338
339 A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model.
340
341 **Working example**
342
343 | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata
344 | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings.
345
346 **Documentation**
347
348 - RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf
349 - accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files
350 <!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md-->
351
352 </help>
353
354 <citations>
355 <citation type="doi">10.1007/s11336-017-9573-x</citation>
356 <citation type="doi">10.1007/s11336-011-9206-8</citation>
357 </citations>
358
359 </tool>