Mercurial > repos > iuc > rgcca
diff rgcca.xml @ 0:067d45e6caa9 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
author | iuc |
---|---|
date | Tue, 12 Jan 2021 10:12:04 +0000 |
parents | |
children | 4e73ea176c34 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rgcca.xml Tue Jan 12 10:12:04 2021 +0000 @@ -0,0 +1,359 @@ +<tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0"> + + <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description> + + <macros> + <import>macro.xml</import> + </macros> + + <edam_topics> + <edam_topic>topic_2269</edam_topic> + </edam_topics> + + <edam_operations> + <edam_operation>operation_2945</edam_operation> + <edam_operation>operation_3465</edam_operation> + <edam_operation>operation_0337</edam_operation> + </edam_operations> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + #set data_paths = ",".join([str(_.file_name) for _ in $blocks]) + #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks]) + Rscript '$__tool_directory__/launcher.R' + --datasets '${data_paths}' + --names '${data_names}' + --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata' + $parse.header + --separator $parse.separator + $analyse.superblock + $analyse.scale + #if $analyse.tau.bool == 'false' + --penalty $analyse.tau.value + #else + --penalty $analyse.tau.bool + #end if + --ncomp $analyse.ncomp + --scheme $analyse.scheme + #if $analyse.method.family == '1' + --type pca + #else + --type $analyse.method.type + #end if + #if $analyse.connection + --connection $analyse.connection + #end if + #if $analyse.supervised.learning_mode == 'supervised' + --response $analyse.supervised.block_response + #end if + #if $graphic.response + --group $graphic.response + #end if + --compx $graphic.compx + --compy $graphic.compy + --nmark $graphic.nmark + $graphic.text + --block $graphic.blockx + --block_y $graphic.blocky + ]]></command> + + <inputs> + <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks" + help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/> + + <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header."> + <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/> + <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files)."> + <option value="1" selected="true">Tabulation</option> + <option value="2">Semicolon</option> + </param> + </section> + + <section name="analyse" title="Advanced analysis" + help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block."> + + <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5" + help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/> + + <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks" + help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/> + + <conditional name="method"> + + <param name="family" type="select" label="Analysis method"> + <option value="1">One block</option> + <option value="2">Two blocks</option> + <option value="m" selected="true">Multiple blocks</option> + <option value="ms">Multiple blocks with superblock</option> + </param> + + <when value="2"> + <param name="type" type="select" label=" "> + <option value="pls">Partial Least Squares Regression</option> + <option value="cca">Canonical Correlation Analysis</option> + <option value="ifa">Interbattery Factor Analysis</option> + <option value="ra">Redundancy analysis</option> + </param> + </when> + + <when value="m"> + <param name="type" type="select" label=" "> + <option value="rgcca">Regularized Generalized CCA</option> + <option value="sgcca">Sparse Generalized CCA</option> + <option value="sumcor">SUM of CORrelations method</option> + <option value="ssqcor">Sum of SQuared CORrelations method</option> + <option value="sabscor">Sum of ABSolute value CORrelations method</option> + <option value="sumcov">SUM of COVariances method</option> + <option value="ssqcov">Sum of SQuared COVariances method</option> + <option value="sabscov">Sum of ABSolute value COVariances method</option> + <option value="maxbet">MAXBET</option> + <option value="maxbet-b">MAXBET-B</option> + </param> + </when> + + <when value="ms"> + <param name="type" type="select" label=" "> + <option value="gcca">Generalized CCA</option> + <option value="hpca">Hierarchical PCA</option> + <option value="mfa">Multiple Factor Analysis</option> + </param> + </when> + + <when value="1"/> + + </conditional> + + <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)" + help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/> + + <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock" + help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/> + + <conditional name="supervised"> + <param name="learning_mode" type="select" display="radio" label="Learning mode"> + <option value="unsupervised">Unsupervised</option> + <option value="supervised">Supervised</option> + </param> + <when value="supervised"> + <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/> + </when> + <when value="unsupervised"/> + </conditional> + + <conditional name="tau"> + <param name="bool" type="select" display="radio" label="Tau selection" + help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included)."> + <option value="false">Manual</option> + <option value="optimal">Optimal</option> + </param> + <when value="false"> + <param name="value" type="float" label=" " value="1" min="0" max="1"/> + </when> + <when value="optimal"/> + </conditional> + + <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme), +the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural +negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one."> + <option value="1">Horst : f(x)</option> + <option value="2" selected="true">Factorial : f(x)^2</option> + <option value="3">Centroid : f|x|</option> + <option value="4">Other: f(x)^4</option> + </param> + + </section> + + <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used."> + <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable" + help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/> + <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/> + <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/> + <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/> + <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/> + <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/> + <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/> + </section> + + <param name="output_selector" type="select" multiple="true" label="Outputs"> + <option value="individuals" selected="true">Individual plot</option> + <option value="corcircle" selected = "true">Corcircle plot</option> + <option value="top_variables">Top variables plot</option> + <option value="ave">Averages plot</option> + <option value="design">Design plot</option> + <option value="individual_table" selected="true">Individual table</option> + <option value="variable_table" selected="true">Variable table</option> + <option value="rdata">RData file</option> + </param> + </inputs> + + <outputs> + <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf"> + <filter>"individuals" in output_selector</filter> + </data> + <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf"> + <filter>"corcircle" in output_selector</filter> + </data> + <data name="top_variables" label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf"> + <filter>"top_variables" in output_selector</filter> + </data> + <data name="ave" label="${tool.name} on ${on_string}: ave.pdf" format="pdf"> + <filter>"ave" in output_selector</filter> + </data> + <data name="design" label="${tool.name} on ${on_string}: design.pdf" format="pdf"> + <filter>"design" in output_selector</filter> + </data> + <data name="individual_table" label="${tool.name} on ${on_string}: individuals.tsv" format="tsv"> + <filter>"individual_table" in output_selector</filter> + </data> + <data name="variable_table" label="${tool.name} on ${on_string}: variables.tsv" format="tsv"> + <filter>"variable_table" in output_selector</filter> + </data> + <data name="rdata" label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata"> + <filter>"rdata" in output_selector</filter> + </data> + </outputs> + + <tests> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="1block"/> + <param name="blocks" value="agriculture.tsv" ftype = "tsv"/> + <output name="individual_table"> + <assert_contents> + <has_n_columns n="4"/> + <has_line_matching + expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/> + <has_line_matching + expression='^.+(\s\-?\d+.\d+){4}$'/> + </assert_contents> + </output> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_connection"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <param name="connection" value="connection.tsv" ftype = "tsv"/> + <param name="superblock" value="false"/> + </section> + <assert_command> + <has_text text="-connection"/> + <has_text text="--superblock"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_supervised"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <param name="superblock" value="false"/> + <conditional name="supervised" > + <param name="learning_mode" value="supervised"/> + <param name="block_response" value="3"/> + </conditional> + </section> + <assert_command> + <has_text text="--response 3"/> + <has_text text="--superblock"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks"/> + <expand macro="output_tests_3blocks"/> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="3blocks_sgcca"/> + <expand macro="output_tests_3blocks"/> + <section name="analyse"> + <conditional name="method"> + <param name="family" value="m"/> + <param name="type" value="sgcca"/> + </conditional> + </section> + <assert_command> + <has_text text="sgcca"/> + </assert_command> + </test> + + <test expect_num_outputs="8" expect_exit_code="0"> + <expand macro="output_tests" path="2blocks" compx="3" compy="1"/> + <param name="blocks" value="agriculture.tsv,politic.tsv"/> + <section name="analyse"> + <param name="scale" value="false"/> + <conditional name="tau"> + <param name="bool" value="false"/> + <param name="value" value="0"/> + </conditional> + <param name="scheme" value="3"/> + <param name="ncomp" value="3"/> + <conditional name="method"> + <param name="family" value="2"/> + <param name="type" value="pls"/> + </conditional> + </section> + <section name="graphic"> + <param name="response" value="political_system.tsv" ftype = "tsv"/> + <param name="text" value="false"/> + <param name="compx" value="3"/> + <param name="compy" value="1"/> + <param name="blockx" value="2"/> + <param name="blocky" value="1"/> + <param name="nmark" value="11"/> + </section> + <assert_command> + <has_text text="pls"/> + <has_text text="--group"/> + </assert_command> + </test> + + </tests> +<help> + +================================== +ABOUT +================================== + + +**Author:** +Etienne CAMENEN + + +**Contact:** +arthur.tenenhaus@centralesupelec.fr + + +**R package:** +The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA). + +--------------------------------------------------- + +================================== +R/SGCCA +================================== + +A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model. + +**Working example** + + | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata + | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings. + +**Documentation** + +- RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf +- accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files +<!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md--> + +</help> + + <citations> + <citation type="doi">10.1007/s11336-017-9573-x</citation> + <citation type="doi">10.1007/s11336-011-9206-8</citation> + </citations> + +</tool>