comparison partial_least_squares.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children caba07f41453
comparison
equal deleted inserted replaced
0:b54326490b4d 1:2e7d47c0b027
1 <tool id="secimtools_partial_least_squares" name="Partial Least Squares Discriminant Analysis (PLS-DA)" version="@WRAPPER_VERSION@">
2 <description></description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 partial_least_squares.py
9 --input $input
10 --design $design
11 --ID $uniqID
12 --group $group
13 --toCompare "$toCompare"
14 --cross_validation $cross_validation
15 --nComp $nComp
16 --outScores $outScores
17 --outWeights $outWeights
18 --outClassification $outClassification
19 --outClassificationAccuracy $outClassificationAccuracy
20 --figure $figures
21 ]]></command>
22 <inputs>
23 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
24 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
25 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
26 <param name="group" type="text" size="30" label="Group/Treatment" help="Name of the column in your design file that contains group classifications."/>
27 <param name="toCompare" type="text" size="30" label="Names of the Groups to Compare" help="Names of the two groups to compare. The user should insure that group names do not contain commas. The separator for the two groups should only include commas (no spaces)."/>
28 <param name="cross_validation" type="select" size="30" display="radio" value="double" label="Cross-Validation Options">
29 <option value="none">None</option>
30 <option value="single">Single</option>
31 <option value="double">Double</option>
32 </param>
33 <param name="nComp" type="text" size="30" value="2" label="Number of Components" help="Number of components for the analysis to use (default = 2). This field is used only when the cross validation field is set to none."/>
34 </inputs>
35 <outputs>
36 <data format="tabular" name="outScores" label="${tool.name} on ${on_string}: Scores"/>
37 <data format="tabular" name="outWeights" label="${tool.name} on ${on_string}: Weights"/>
38 <data format="tabular" name="outClassification" label="${tool.name} on ${on_string}: Classification of Samples"/>
39 <data format='tabular' name="outClassificationAccuracy" label="${tool.name} on ${on_string}: Classification Accuracy of Samples"/>
40 <data format="pdf" name="figures" label="${tool.name} on ${on_string}: Scatter Plots"/>
41 </outputs>
42 <tests>
43 <test>
44 <param name="input" value="ST000006_data.tsv"/>
45 <param name="design" value="ST000006_design_group_name_underscore.tsv"/>
46 <param name="uniqID" value="Retention_Index" />
47 <param name="group" value="White_wine_type_and_source" />
48 <param name="toCompare" value="Chardonnay_ Napa_ CA 2003,Riesling_ CA 2004" />
49 <param name="cross_validation" value="none"/>
50 <param name="nComp" value="2"/>
51 <output name="outScores" file="ST000006_partial_least_squares_none_scores.tsv" />
52 <output name="outWeights" file="ST000006_partial_least_squares_none_weights.tsv" />
53 <output name="outClassification" file="ST000006_partial_least_squares_none_classification.tsv" />
54 <output name="outClassificationAccuracy" file="ST000006_partial_least_squares_none_classification_accuracy.tsv" />
55 <output name="figures" file="ST000006_partial_least_squares_none_figure.pdf" compare="sim_size" delta="10000"/>
56 </test>
57 </tests>
58 <help><![CDATA[
59
60 @TIP_AND_WARNING@
61
62 **Tool Description**
63
64 The tool performs partial least square discriminant analysis (PLS-DA) for two treatment groups selected by the user.
65
66 **NOTE: A minimum of 100 samples is required by the tool for single or double cross validation**
67
68 The subspace dimension defines the number of components that will be used to describe the variability within the data.
69 The user can specify subspace dimension in the range of two to the sample number.
70 The user has the option to specify the dimension of the subspace directly (Default =2) or to perform single or double cross-validation to determine the dimension of the subspace.
71
72 For single and double cross-validation: the data set is split differently when the model fit is performed.
73
74 For double cross-validation: the data set is split into pieces and the model fit is performed on one piece using cross-validation and evaluated on the other pieces.
75
76 For single cross-validation: the same data are used to fit the model and to evaluate the model using three-fold cross validation.
77
78 More details can be found in:
79
80 Geladi, Paul, and Bruce R. Kowalski. "Partial least-squares regression: a tutorial." Analytica chimica acta 185 (1986): 1-17.
81
82
83 --------------------------------------------------------------------------------
84
85 **Note**
86
87 - This tool currently treats all variables as continuous numeric
88 variables. Running the tool on categorical variables may result in
89 incorrect results.
90 - Rows containing non-numeric (or missing) data in any
91 of the chosen columns will be skipped from the analysis.
92
93 --------------------------------------------------------------------------------
94
95 **Input**
96
97 - Two input datasets are required.
98
99 @WIDE@
100
101 **NOTE:** The sample IDs must match the sample IDs in the Design File (below).
102 Extra columns will automatically be ignored.
103
104
105 @METADATA@
106
107 @UNIQID@
108
109 @GROUP@
110
111
112 **Names of the Groups to Compare**
113
114 - Comma separated names of the two groups in your Group/Treatment column that you want to compare. The user should ensure that group names do not contain commas. The separator for the two groups should only include commas (no spaces).
115
116 **Cross-Validation Options**
117
118 - The choice of cross-validation options available for the user. None corresponds to no cross-validation when the user specifies the number of components manually.
119
120 **Number of Components**
121
122 - The parameter is used only when the "None" cross-validation option is selected. If the field is left blank, the number of components is set to the default value (2).
123 --------------------------------------------------------------------------------
124
125 **Output**
126
127
128 Three different files are generated:
129
130 (1) a TSV file containing the scores produced by the model for each sample
131
132 (2) a TSV file containing the weights produced by the model for each feature.
133
134 (3) a TSV file containing the classification produced by the model for each sample.
135
136 (4) a TSV file containing the algorithm classification accuracy (in percent).
137
138 (5) a PDF file containing the 2D plots for all pairwise comparisons of components between the two treatment groups.
139
140 **NOTE:** Regardless how many components are selected for the algorithm, pairwise 2D plots are produced for the pairs of components.
141 Increasing the number of components will increase the number of plots produced.
142
143 ]]></help>
144 <expand macro="citations"/>
145 </tool>