0
|
1 <tool id="secimtools_principal_component_analysis" name="Principal Component Analysis (PCA)" version="@WRAPPER_VERSION@">
|
|
2 <description>for visual summaries of the components.</description>
|
|
3 <macros>
|
|
4 <import>macros.xml</import>
|
|
5 </macros>
|
|
6 <expand macro="requirements" />
|
|
7 <command detect_errors="exit_code"><![CDATA[
|
|
8 principal_component_analysis.py
|
|
9 --input $input
|
|
10 --design $design
|
|
11 --ID $uniqID
|
|
12 --load_out $loadings
|
|
13 --score_out $scores
|
|
14 --summary_out $summary
|
|
15 --figure $figures
|
|
16
|
|
17 #if $group
|
|
18 --group $group
|
|
19 #end if
|
|
20 ]]></command>
|
|
21 <inputs>
|
|
22 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
|
|
23 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
|
|
24 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
|
|
25 <param name="group" type="text" size="30" label="Group/Treatment [Optional]" help="Name of the column in your design file that contains group classifications."/>
|
|
26 </inputs>
|
|
27 <outputs>
|
|
28 <data format="tabular" name="loadings" label="${tool.name} on ${on_string}: loadings"/>
|
|
29 <data format="tabular" name="scores" label="${tool.name} on ${on_string}: scores"/>
|
|
30 <data format="tabular" name="summary" label="${tool.name} on ${on_string}: summary"/>
|
|
31 <data format="pdf" name="figures" label="${tool.name} on ${on_string}: scatter plots"/>
|
|
32 </outputs>
|
|
33 <tests>
|
|
34 <test>
|
|
35 <param name="input" value="ST000006_data.tsv"/>
|
|
36 <param name="design" value="ST000006_design.tsv"/>
|
|
37 <param name="uniqID" value="Retention_Index" />
|
|
38 <param name="group" value="White_wine_type_and_source" />
|
|
39 <output name="loadings" file="ST000006_principal_component_analysis_load_out.tsv" />
|
|
40 <output name="scores" file="ST000006_principal_component_analysis_score_out.tsv" />
|
|
41 <output name="summary" file="ST000006_principal_component_analysis_summary_out.tsv" />
|
|
42 <output name="figures" file="ST000006_principal_component_analysis_figure.pdf" compare="sim_size" delta="10000" />
|
|
43 </test>
|
|
44 </tests>
|
|
45 <help><![CDATA[
|
|
46
|
|
47 @TIP_AND_WARNING@
|
|
48
|
|
49 **Tool Description**
|
|
50
|
|
51 The tool performs principal component analysis (PCA) of the data.
|
|
52 Visual summaries are provided in the from of 2D and 3D scatter plots for the first three principal components.
|
|
53 Samples in the scatter plots are colored based on the group classification.
|
|
54
|
|
55 --------------------------------------------------------------------------------
|
|
56
|
|
57 **Note**
|
|
58
|
|
59 - This tool currently treats all variables as continuous numeric variables. Running the tool on categorical variables might result in incorrect results.
|
|
60 - Rows containing non-numeric (or missing) data in any of the chosen columns will be skipped from the analysis.
|
|
61
|
|
62 --------------------------------------------------------------------------------
|
|
63
|
|
64 **Input**
|
|
65
|
|
66 - Two input datasets are required.
|
|
67
|
|
68 @WIDE@
|
|
69
|
|
70 **NOTE:** The sample IDs must match the sample IDs in the Design File (below).
|
|
71 Extra columns will automatically be ignored.
|
|
72
|
|
73
|
|
74 @METADATA@
|
|
75
|
|
76 @UNIQID@
|
|
77
|
|
78 @GROUP_OPTIONAL@
|
|
79
|
|
80 --------------------------------------------------------------------------------
|
|
81
|
|
82 **Output**
|
|
83
|
|
84 Four different outputs are produced by the Principal Component Analysis tool:
|
|
85
|
|
86 (1) a TSV file containing eigenvectors/variable loadings
|
|
87 (2) a TSV file containing scores of input data on principal components
|
|
88 (3) a TSV file with the summary for each component
|
|
89 (4) and a PDF file of scatter plots of the first three principal components
|
|
90
|
|
91 There are a total of four scatterplots: three pairwise plots for the first three components and a single 3D plot of the first three components.
|
|
92
|
|
93 ]]></help>
|
|
94 <expand macro="citations"/>
|
|
95 </tool>
|