comparison principal_component_analysis.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_principal_component_analysis" name="Principal Component Analysis (PCA)" version="@WRAPPER_VERSION@">
2 <description>for visual summaries of the components.</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 principal_component_analysis.py
9 --input $input
10 --design $design
11 --ID $uniqID
12 --load_out $loadings
13 --score_out $scores
14 --summary_out $summary
15 --figure $figures
16
17 #if $group
18 --group $group
19 #end if
20 ]]></command>
21 <inputs>
22 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
23 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
24 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
25 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications."/>
26 </inputs>
27 <outputs>
28 <data format="tabular" name="loadings" label="${tool.name} on ${on_string}: loadings"/>
29 <data format="tabular" name="scores" label="${tool.name} on ${on_string}: scores"/>
30 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: summary"/>
31 <data format="pdf" name="figures" label="${tool.name} on ${on_string}: scatter plots"/>
32 </outputs>
33 <tests>
34 <test>
35 <param name="input" value="ST000006_data.tsv"/>
36 <param name="design" value="ST000006_design.tsv"/>
37 <param name="uniqID" value="Retention_Index" />
38 <param name="group" value="White_wine_type_and_source" />
39 <output name="loadings" file="ST000006_principal_component_analysis_load_out.tsv" />
40 <output name="scores" file="ST000006_principal_component_analysis_score_out.tsv" />
41 <output name="summary" file="ST000006_principal_component_analysis_summary_out.tsv" />
42 <output name="figures" file="ST000006_principal_component_analysis_figure.pdf" compare="sim_size" delta="10000" />
43 </test>
44 </tests>
45 <help><![CDATA[
46
47 @TIP_AND_WARNING@
48
49 **Tool Description**
50
51 The tool performs principal component analysis (PCA) of the data.
52 Visual summaries are provided in the from of 2D and 3D scatter plots for the first three principal components.
53 Samples in the scatter plots are colored based on the group classification.
54
55 --------------------------------------------------------------------------------
56
57 **Note**
58
59 - This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results.
60 - Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
61
62 --------------------------------------------------------------------------------
63
64 **Input**
65
66 - Two input datasets are required.
67
68 @WIDE@
69
70 **NOTE:** The sample IDs must match the sample IDs in the Design File (below).
71 Extra columns will automatically be ignored.
72
73
74 @METADATA@
75
76 @UNIQID@
77
78 @GROUP_OPTIONAL@
79
80 --------------------------------------------------------------------------------
81
82 **Output**
83
84 Four different outputs are produced by the Principal Component Analysis tool:
85
86 (1) a TSV file containing eigenvectors/variable loadings
87 (2) a TSV file containing scores of input data on principal components
88 (3) a TSV file with the summary for each component
89 (4) and a PDF file of scatter plots of the first three principal components
90
91 There are a total of four scatterplots: three pairwise plots for the first three components and a single 3D plot of the first three components.
92
93 ]]></help>
94 <expand macro="citations"/>
95 </tool>