Mercurial > repos > malex > secimtools
annotate data_normalization_and_rescaling.xml @ 1:2e7d47c0b027 draft
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author | malex |
---|---|
date | Mon, 08 Mar 2021 22:04:06 +0000 |
parents | |
children |
rev | line source |
---|---|
1
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
1 <tool id="secimtools_data_normalization_and_rescaling" name="Normalization and Re-Scaling" version="@WRAPPER_VERSION@"> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
2 <description>of data.</description> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
3 <macros> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
4 <import>macros.xml</import> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
5 </macros> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
6 <expand macro="requirements" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
7 <command><![CDATA[ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
8 data_normalization_and_rescaling.py |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
9 --input $input |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
10 --design $design |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
11 --uniqID $uniqID |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
12 --method $method |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
13 --out $out |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
14 ]]></command> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
15 <inputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
16 <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input tab-separated wide format dataset. If not tab separated see TIP below."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
17 <param name="design" type="data" format="tabular" label="Design File" help="Input your design file (tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
18 <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your Wide Dataset that has unique identifiers."/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
19 <param name="method" size="30" type="select" value="" display="radio" label="Normalization Method" help="Method to be used for normalization and re-scaling of the data."> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
20 <option value="mean" selected="true">Mean (samples)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
21 <option value="sum" selected="true">Sum (samples)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
22 <option value="median" selected="true">Median (samples)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
23 <option value="centering" selected="true">Centering (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
24 <option value="auto" selected="true">Autoscaling (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
25 <option value="pareto" selected="true">Pareto (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
26 <option value="range" selected="true">Range (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
27 <option value="level" selected="true">Level (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
28 <option value="vast" selected="true">VAST (features)</option> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
29 </param> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
30 </inputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
31 <outputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
32 <data format="tabular" name="out" label="${tool.name} on ${on_string}: Normalized data"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
33 </outputs> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
34 <tests> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
35 <test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
36 <param name="input" value="ST000006_data.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
37 <param name="design" value="ST000006_design.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
38 <param name="uniqID" value="Retention_Index" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
39 <param name="method" value="mean" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
40 <output name="out" file="ST000006_data_normalization_and_rescaling_mean_output.tsv" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
41 </test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
42 <test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
43 <param name="input" value="ST000006_data.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
44 <param name="design" value="ST000006_design.tsv"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
45 <param name="uniqID" value="Retention_Index" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
46 <param name="method" value="vast" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
47 <output name="out" file="ST000006_data_normalization_and_rescaling_vast_output.tsv" /> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
48 </test> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
49 </tests> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
50 <help><![CDATA[ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
51 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
52 @TIP_AND_WARNING@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
53 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
54 **Tool Description** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
55 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
56 The first three normalization methods (Mean, Sum and Median) perform re-scaling of the data by sample. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
57 Each individual sample (column) in the wide dataset is re-scaled by dividing all feature values within that column by the mean, median or sum of those feature values. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
58 Each sample (column) is re-scaled independently from other samples (columns). |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
59 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
60 The last six normalization methods (Centering, Pareto, Autoscaling, Range, Level, and Variable Stability (VAST)) perform scaling of the data by features. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
61 Each feature (row) is re-scaled independently from other features. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
62 Each individual feature (row) in the wide dataset is centered by subtraction of the mean of that feature and is re-scaled by dividing all the feature values within that row by the scaling factor. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
63 The scaling factor is computed from the feature values in the current row and depends on the selected method. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
64 Centering does not have a scaling factor and does not perform division, Autoscaling uses standard deviation, Pareto scaling uses the square root of the standard deviation, Range uses the difference between the max and min values, and Level uses the mean. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
65 VAST scaling is performed in two steps. The first step is Autoscaling, followed by division of the resulting feature values in each row by the coefficient of variation for that feature. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
66 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
67 More information on the scaling methods are available from the literature: |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
68 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
69 Keun, Hector C., Timothy MD Ebbels, Henrik Antti, Mary E. Bollard, Olaf Beckonert, Elaine Holmes, John C. Lindon, and Jeremy K. Nicholson. "Improved analysis of multivariate data by variable stability scaling: application to NMR-based metabolic profiling." Analytica chimica acta 490, no. 1 (2003): 265-276. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
70 van den Berg, Robert A., Huub CJ Hoefsloot, Johan A. Westerhuis, Age K. Smilde, and Mariƫt J. van der Werf. "Centering, scaling, and transformations: improving the biological information content of metabolomics data." BMC genomics 7, no. 1 (2006): 142 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
71 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
72 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
73 ------------------------------------------------------------------------------------------ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
74 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
75 **Input Files** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
76 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
77 - Two input datasets are required. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
78 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
79 @WIDE@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
80 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
81 **NOTE:** The sample IDs must match the sample IDs in the Design File |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
82 (below). Extra columns will automatically be ignored. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
83 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
84 @METADATA@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
85 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
86 @UNIQID@ |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
87 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
88 **Normalization Method** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
89 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
90 - Method to be used for normalization and re-scaling of the data. The parenthesis indicates whether the method will be applied to samples or features. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
91 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
92 -------------------------------------------------------------------------------- |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
93 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
94 **Output** |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
95 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
96 TSV file containing the same column names as in the original Wide Dataset where the values in each cell correspond to the values after normalization/re-scaling. |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
97 |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
98 ]]></help> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
99 <expand macro="citations"/> |
2e7d47c0b027
"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
malex
parents:
diff
changeset
|
100 </tool> |