Mercurial > repos > artbio > gsc_high_dimensions_visualisation
comparison high_dim_visu.xml @ 8:fe6f76030168 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gsc_high_dimension_visualization commit a3dc683410fc240f428c8fbee3c63aa9965fbf38
author | artbio |
---|---|
date | Wed, 29 Nov 2023 17:28:18 +0000 |
parents | 18a1dc4aec4a |
children | 58aa18e1fe14 |
comparison
equal
deleted
inserted
replaced
7:18a1dc4aec4a | 8:fe6f76030168 |
---|---|
1 <tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="0.9.6"> | 1 <tool id="high_dimensions_visualisation" name="Generate PCA, tSNE and HCPC" version="4.3+galaxy0" profile="20.01"> |
2 <description>from highly dimensional expression data</description> | 2 <description>from highly dimensional expression data</description> |
3 <requirements> | 3 <requirements> |
4 <requirement type="package" version="1.6.2=r35h6115d3f_0">r-optparse</requirement> | 4 <requirement type="package" version="1.7.3=r43hc72bb7e_2">r-optparse</requirement> |
5 <requirement type="package" version="1.42=r35h6115d3f_0">r-factominer</requirement> | 5 <requirement type="package" version="2.9=r43h57805ef_0">r-factominer</requirement> |
6 <requirement type="package" version="1.0.5">r-factoextra</requirement> | 6 <requirement type="package" version="1.0.7=r43hc72bb7e_3">r-factoextra</requirement> |
7 <requirement type="package" version="0.15=r351he1b5a44_0">r-rtsne</requirement> | 7 <requirement type="package" version="0.16=r43h7ce84a7_2">r-rtsne</requirement> |
8 <requirement type="package" version="0.4.7=r351h6115d3f_0">r-ggfortify</requirement> | 8 <requirement type="package" version="0.4.16=r43hc72bb7e_1">r-ggfortify</requirement> |
9 <requirement type="package" version="1.1.9=r351h0357c0b_0">r-clusterr</requirement> | 9 <requirement type="package" version="1.3.1=r43h08d816e_1">r-clusterr</requirement> |
10 <requirement type="package" version="1.2.5=r35h6115d3f_0">r-polychrome</requirement> | 10 <requirement type="package" version="1.5.1=r43hc72bb7e_2">r-polychrome</requirement> |
11 </requirements> | 11 </requirements> |
12 <stdio> | 12 <stdio> |
13 <exit_code range="1:" level="fatal" description="Tool exception" /> | 13 <exit_code range="1:" level="fatal" description="Tool exception" /> |
14 </stdio> | 14 </stdio> |
15 <command detect_errors="exit_code"><![CDATA[ | 15 <command detect_errors="exit_code"><![CDATA[ |
16 Rscript $__tool_directory__/high_dim_visu.R | 16 Rscript $__tool_directory__/high_dim_visu.R |
17 --data '$input' | 17 --data '$input' |
18 --sep '$input_sep' | |
19 --colnames '$input_header' | |
20 #if $factor_condition.factor_choice == 'Yes' | 18 #if $factor_condition.factor_choice == 'Yes' |
21 --factor '$factor_condition.factor' | 19 --factor '$factor_condition.factor' |
22 #end if | 20 #end if |
23 #if $labels == "yes": | 21 #if $labels == "yes": |
24 --labels 'TRUE' | 22 --labels 'TRUE' |
25 #else | 23 #else |
26 --labels 'FALSE' | 24 --labels 'FALSE' |
27 #end if | 25 #end if |
28 #if $coord == "yes": | |
29 --table_coordinates '$table_coordinates' | |
30 #end if | |
31 --visu_choice '$visualisation.visu_choice' | 26 --visu_choice '$visualisation.visu_choice' |
27 | |
32 #if $visualisation.visu_choice == "tSNE": | 28 #if $visualisation.visu_choice == "tSNE": |
33 --Rtsne_seed '$visualisation.Rtsne_seed' | 29 --Rtsne_seed '$visualisation.Rtsne_seed' |
34 --Rtsne_perplexity '$visualisation.Rtsne_perplexity' | 30 --Rtsne_perplexity '$visualisation.Rtsne_perplexity' |
35 --Rtsne_theta '$visualisation.Rtsne_theta' | 31 --Rtsne_theta '$visualisation.Rtsne_theta' |
36 --Rtsne_max_iter '$visualisation.Rtsne_max_iter' | 32 --Rtsne_max_iter '$visualisation.Rtsne_max_iter' |
52 --HCPC_itermax '$visualisation.HCPC_itermax' | 48 --HCPC_itermax '$visualisation.HCPC_itermax' |
53 --HCPC_min '$visualisation.HCPC_min' | 49 --HCPC_min '$visualisation.HCPC_min' |
54 --HCPC_max '$visualisation.HCPC_max' | 50 --HCPC_max '$visualisation.HCPC_max' |
55 --HCPC_clusterCA '$visualisation.HCPC_clusterCA' | 51 --HCPC_clusterCA '$visualisation.HCPC_clusterCA' |
56 --HCPC_kk '$visualisation.HCPC_kk' | 52 --HCPC_kk '$visualisation.HCPC_kk' |
57 --HCPC_cluster_description '$HCPC_cluster_description' | 53 --HCPC_contributions '$HCPC_contributions' |
58 #if $visualisation.res_clustering == "yes": | 54 --HCPC_cell_clust '$HCPC_cell_clust' |
59 --HCPC_clust '$HCPC_clust' | 55 #if $factor_condition.factor_choice == "Yes": |
60 #end if | 56 --HCPC_mutual_info '$HCPC_mutual_info' |
57 #end if | |
61 #end if | 58 #end if |
62 | 59 |
63 #if $visualisation.visu_choice == "PCA": | 60 #if $visualisation.visu_choice == "PCA": |
64 --PCA_npc '$visualisation.PCA_npc' | 61 --PCA_npc '$visualisation.PCA_npc' |
65 --PCA_x_axis '$visualisation.PCA_x_axis' | 62 --x_axis '$visualisation.x_axis' |
66 --PCA_y_axis '$visualisation.PCA_y_axis' | 63 --y_axis '$visualisation.y_axis' |
64 --item_size '$visualisation.item_size' | |
67 #end if | 65 #end if |
68 | |
69 #if $visualisation.visu_choice == "HCPC" and $factor_condition.factor_choice == "Yes": | |
70 --HCPC_mutual_info '$HCPC_mutual_info' | |
71 #end if | |
72 | |
73 --pdf_out '$pdf_out' | 66 --pdf_out '$pdf_out' |
74 | |
75 ]]></command> | 67 ]]></command> |
76 <inputs> | 68 <inputs> |
77 <param name="input" type="data" format="txt,tabular" label="expression data"/> | 69 <param name="input" type="data" format="txt,tabular" label="expression data"/> |
78 <param name="input_sep" type="select" label="Input column separator"> | |
79 <option value="tab" selected="true">Tabs</option> | |
80 <option value=",">Comma</option> | |
81 </param> | |
82 <param name="input_header" type="select" label="Consider first line of input file as header?"> | |
83 <option value="TRUE" selected="true">Yes</option> | |
84 <option value="FALSE">No</option> | |
85 </param> | |
86 <param name="labels" type="select" label="Add sample labels to scatter plot" > | 70 <param name="labels" type="select" label="Add sample labels to scatter plot" > |
87 <option value="no" selected="true">No Labels</option> | 71 <option value="no" selected="true">No Labels</option> |
88 <option value="yes" >Label points</option> | 72 <option value="yes" >Label points</option> |
89 </param> | 73 </param> |
90 <conditional name="factor_condition"> | 74 <conditional name="factor_condition"> |
92 <option value="Yes">Yes</option> | 76 <option value="Yes">Yes</option> |
93 <option value="No" selected="true">No</option> | 77 <option value="No" selected="true">No</option> |
94 </param> | 78 </param> |
95 <when value="Yes"> | 79 <when value="Yes"> |
96 <param name="factor" type="data" format="tabular" label="Factor to constrast data" | 80 <param name="factor" type="data" format="tabular" label="Factor to constrast data" |
97 help="A two-column data frame, first column contains data labels, second column contains the levels of a factor to contrast visualisation" /> | 81 help="A two-column data frame, with column headers, first column contains data labels, |
82 second column contains the levels of a factor to contrast visualisation" /> | |
98 </when> | 83 </when> |
99 <when value="No"> | 84 <when value="No"> |
100 </when> | 85 </when> |
101 </conditional> | 86 </conditional> |
102 <conditional name="visualisation"> | 87 <conditional name="visualisation"> |
104 <option value="PCA" selected="True">PCA</option> | 89 <option value="PCA" selected="True">PCA</option> |
105 <option value="HCPC">HCPC</option> | 90 <option value="HCPC">HCPC</option> |
106 <option value="tSNE">t-SNE</option> | 91 <option value="tSNE">t-SNE</option> |
107 </param> | 92 </param> |
108 <when value="tSNE"> | 93 <when value="tSNE"> |
109 <param name="Rtsne_seed" value="42" type="integer" label="Seed value for reproducibility of t-SNE" help="Set to 42 as default" /> | 94 <param name="Rtsne_seed" value="42" type="integer" label="Seed value for reproducibility of t-SNE" help="Set to 42 as default"/> |
110 <param name="Rtsne_dims" value="2" type="integer" label="dims (t-SNE)" help="Output dimensionality (should not be greater than 3)" /> | 95 <param name="Rtsne_dims" value="2" type="integer" label="dims (t-SNE)" help="Output dimensionality (should not be greater than 3)"/> |
111 <param name="Rtsne_pca" type="select" label="pca (t-SNE)" help="Whether an initial PCA step should be performed" > | 96 <param name="Rtsne_pca" type="select" label="pca (t-SNE)" help="Whether an initial PCA step should be performed" > |
112 <option value="TRUE" selected="true">Yes</option> | 97 <option value="TRUE" selected="true">Yes</option> |
113 <option value="FALSE">False</option> | 98 <option value="FALSE">False</option> |
114 </param> | 99 </param> |
115 <param name="Rtsne_initial_dims" value="50" type="integer" label="initial dims (t-SNE)" help="The number of dimensions that should be retained in the initial PCA step" /> | 100 <param name="Rtsne_initial_dims" value="50" type="integer" label="initial dims (t-SNE)" |
116 <param name="Rtsne_pca_center" type="select" label="Centering data" help="Should data be centered before pca is applied? " > | 101 help="The number of dimensions that should be retained in the initial PCA step"/> |
117 <option value="TRUE" selected="true">Yes</option> | 102 <param name="Rtsne_pca_center" type="select" label="Centering data" help="Should data be centered before pca is applied?"> |
118 <option value="FALSE">False</option> | 103 <option value="TRUE" selected="true">Yes</option> |
119 </param> | 104 <option value="FALSE">False</option> |
120 <param name="Rtsne_pca_scale" type="select" label="Scalling data" help="Should data be scaled before pca is applied? " > | 105 </param> |
121 <option value="TRUE">Yes</option> | 106 <param name="Rtsne_pca_scale" type="select" label="Scalling data" help="Should data be scaled before pca is applied?"> |
122 <option value="FALSE" selected="true">False</option> | 107 <option value="TRUE">Yes</option> |
123 </param> | 108 <option value="FALSE" selected="true">False</option> |
124 <param name="Rtsne_normalize" type="select" label="Normalisation of data" | 109 </param> |
125 help="Should variables (gene expressions) be normalized internally prior to distance calculations? " > | 110 <param name="Rtsne_normalize" type="select" label="Normalisation of data" |
126 <option value="TRUE" selected="true">Yes</option> | 111 help="Should variables (gene expressions) be normalized internally prior to distance calculations?"> |
127 <option value="FALSE">False</option> | 112 <option value="TRUE" selected="true">Yes</option> |
128 </param> | 113 <option value="FALSE">False</option> |
129 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3" /> | 114 </param> |
115 <param name="Rtsne_perplexity" value="10.0" type="float" label="perplexity (t-SNE)" help="should be less than ((nbr observations)-1)/3"/> | |
130 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/> | 116 <param name="Rtsne_theta" value="1.0" type="float" label="theta (t-SNE)"/> |
131 <param name="Rtsne_exaggeration_factor" value="12.0" type="float" label="Exageration factor" help="Exaggeration factor used to multiply the P matrix in the first part of the optimization" /> | 117 <param name="Rtsne_exaggeration_factor" value="12.0" type="float" label="Exageration factor" |
118 help="Exaggeration factor used to multiply the P matrix in the first part of the optimization"/> | |
132 <param name="Rtsne_max_iter" value="1000" type="integer" label="Number of iterations (default: 1000)" | 119 <param name="Rtsne_max_iter" value="1000" type="integer" label="Number of iterations (default: 1000)" |
133 help="The number of iterations that Rtsne executes to improve low dim representation (gradient descent optimization)" /> | 120 help="The number of iterations that Rtsne executes to improve low dim representation (gradient descent optimization)"/> |
134 </when> | 121 </when> |
135 <when value="HCPC"> | 122 <when value="HCPC"> |
136 <param name="HCPC_npc" value="5" type="integer" label="Number of principal components to keep" | 123 <param name="HCPC_npc" value="5" type="integer" label="Number of principal components to keep" |
137 help="The number of dimensions which are kept for HCPC analysis (default=5)" /> | 124 help="The number of dimensions which are kept for HCPC analysis (default=5)"/> |
138 <param name="HCPC_ncluster" value="-1" type="integer" label="Number of clusters in Hierar. Clustering" | 125 <param name="HCPC_ncluster" value="-1" type="integer" label="Number of clusters in Hierar. Clustering" |
139 help="nb.clust - an integer. If 0, the tree is cut at the level the user clicks on (not working in Galaxy). If -1, the tree is | 126 help="nb.clust - an integer. If 0, the tree is cut at the level the user clicks on (not working in Galaxy). If -1, the tree is |
140 automatically cut at the suggested level (see details). If a (positive) integer, the tree is cut with nb.cluters clusters." /> | 127 automatically cut at the suggested level (see details). If a (positive) integer, the tree is cut with nb.cluters clusters."/> |
141 <param name="HCPC_metric" type="select" label="Dissimilarity metric" help="Metric to be used for calculating dissimilarities between observations, can be 'euclidean' or 'manhattan' " > | 128 <param name="HCPC_metric" type="select" label="Dissimilarity metric" |
142 <option value="euclidean" selected="true">euclidean</option> | 129 help="Metric to be used for calculating dissimilarities between observations, can be 'euclidean' or 'manhattan'"> |
143 <option value="manhattan">manhattan</option> | 130 <option value="euclidean" selected="true">euclidean</option> |
144 </param> | 131 <option value="manhattan">manhattan</option> |
145 <param name="HCPC_method" type="select" label="Clustering method" help="character string defining the clustering method. | 132 </param> |
146 The four methods implemented are 'average' ([unweighted pair-]group [arithMetic] average method, aka ‘UPGMA’), | 133 <param name="HCPC_method" type="select" label="Clustering method" |
147 'single' (single linkage), 'complete' (complete linkage), and 'ward' (Ward's method). | 134 help="character string defining the clustering method. The four methods implemented are 'average' |
148 The default with this Galaxy tool is is 'ward'." > | 135 ([unweighted pair-]group [arithMetic] average method, aka ‘UPGMA’), 'single' (single linkage), 'complete' |
149 <option value="ward" selected="true">ward</option> | 136 (complete linkage), and 'ward' (Ward's method). The default with this Galaxy tool is is 'ward'."> |
150 <option value="average">average</option> | 137 <option value="ward" selected="true">ward</option> |
151 <option value="single">single</option> | 138 <option value="average">average</option> |
152 <option value="complete">complete</option> | 139 <option value="single">single</option> |
153 </param> | 140 <option value="complete">complete</option> |
154 <param name="HCPC_consol" type="select" label="k-means consolidation" help="a boolean. If TRUE, a k-means consolidation is performed | 141 </param> |
155 (consolidation cannot be performed if kk is used and equals a number)." > | 142 <param name="HCPC_consol" type="select" label="k-means consolidation" |
156 <option value="TRUE" selected="true">Yes</option> | 143 help="A boolean. If TRUE, a k-means consolidation is performed (consolidation cannot be performed |
157 <option value="FALSE">False</option> | 144 if kk is used and equals a number)."> |
158 </param> | 145 <option value="TRUE" selected="true">Yes</option> |
159 <param name="HCPC_itermax" value="10" type="integer" label="Maximum number of iterations for consolidation" | 146 <option value="FALSE">False</option> |
160 help="An integer. The maximum number of iterations for the consolidation. (default=10)" /> | 147 </param> |
148 <param name="HCPC_itermax" value="10" type="integer" label="Maximum number of iterations for consolidation" | |
149 help="An integer. The maximum number of iterations for the consolidation. (default=10)"/> | |
161 <param name="HCPC_min" value="3" type="integer" label="min number of clusters" | 150 <param name="HCPC_min" value="3" type="integer" label="min number of clusters" |
162 help="an integer. The least possible number of clusters suggested. (default=3)" /> | 151 help="an integer. The least possible number of clusters suggested. (default=3)"/> |
163 <param name="HCPC_max" value="-1" type="text" label="max number of clusters" | 152 <param name="HCPC_max" value="-1" type="text" label="max number of clusters" |
164 help="The higher possible number of clusters suggested, by default the minimum between 10 and the number of individuals divided by 2. (default=-1)" /> | 153 help="The higher possible number of clusters suggested, by default the minimum between 10 |
165 <param name="HCPC_clusterCA" type="select" label="cluster.CA, Clustering against rows or columns" | 154 and the number of individuals divided by 2. (default = NULL)"/> |
166 help="A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results.default(rows)"> | 155 <param name="HCPC_clusterCA" type="select" label="cluster.CA, Clustering against rows or columns" |
167 <option value="rows" selected="true">Rows</option> | 156 help="A string equals to 'rows' or 'columns' for the clustering of Correspondence Analysis results.default(rows)"> |
168 <option value="cols">Columns</option> | 157 <option value="rows" selected="true">Rows</option> |
169 </param> | 158 <option value="cols">Columns</option> |
170 <param name="HCPC_kk" value="Inf" type="text" label="kk, Number of clusters used in a Kmeans preprocessing " | 159 </param> |
160 <param name="HCPC_kk" value="Inf" type="text" label="kk, Number of clusters used in a Kmeans preprocessing" | |
171 help="An integer corresponding to the number of clusters used in a Kmeans preprocessing before the | 161 help="An integer corresponding to the number of clusters used in a Kmeans preprocessing before the |
172 hierarchical clustering; the top of the hierarchical tree is then constructed from this partition. | 162 hierarchical clustering; the top of the hierarchical tree is then constructed from this partition. |
173 This is very useful if the number of individuals is high. Note that consolidation cannot be performed | 163 This is very useful if the number of individuals is high. Note that consolidation cannot be performed |
174 if kk is different from Inf and some graphics are not drawn. Inf is used by default and no preprocessing | 164 if kk is different from Inf and some graphics are not drawn. Inf is used by default and no preprocessing |
175 is done, all the graphical outputs are then given." /> | 165 is done, all the graphical outputs are then given."/> |
176 <param label="Return HCPC clustering table" name="res_clustering" type="select"> | |
177 <option value="no" selected="True">No</option> | |
178 <option value="yes">Yes</option> | |
179 </param> | |
180 </when> | 166 </when> |
181 <when value="PCA"> | 167 <when value="PCA"> |
182 <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep" help="The number of dimensions which are kept for PCA analysis (default=5)" /> | 168 <param name="PCA_npc" value="5" type="integer" label="Number of principal components to keep" |
183 <param name="PCA_x_axis" value="1" type="integer" label="First principal component to plot" help="X axis for PCA plot (default=1)" /> | 169 help="The number of dimensions which are kept for PCA analysis (default=5)"/> |
184 <param name="PCA_y_axis" value="2" type="integer" label="Second principal component to plot" help="Y axis for PCA plot (default=2)" /> | 170 <param name="item_size" value="1" type="float" label="Adjust size of points/labels in PCA graph" help="size of points/labels (default=1)"/> |
171 <param name="x_axis" value="1" type="integer" label="Principal component to plot to x axis" help="PC to plot as x (default=1)"/> | |
172 <param name="y_axis" value="2" type="integer" label="Principal component to plot to y axis" help="PCA plot as y (default=2)"/> | |
185 </when> | 173 </when> |
186 </conditional> | 174 </conditional> |
187 <param label="Return scatter plot table coordinates" name="coord" type="select"> | |
188 <option value="no" selected="True">No</option> | |
189 <option value="yes">Yes</option> | |
190 </param> | |
191 | |
192 </inputs> | 175 </inputs> |
193 <outputs> | 176 <outputs> |
194 <data name="pdf_out" format="pdf" label="${visualisation.visu_choice} of ${on_string}" /> | 177 <data name="pdf_out" format="pdf" label="${visualisation.visu_choice} of ${on_string}"/> |
195 <data name="table_coordinates" format="tabular" label="Scatter plot coordinates from ${visualisation.visu_choice} of ${on_string}" > | 178 <data name="HCPC_cell_clust" format="tabular" label="Clustering table from ${visualisation.visu_choice} of ${on_string}"> |
196 <filter>coord == 'yes'</filter> | 179 <filter>visualisation['visu_choice'] == 'HCPC'</filter> |
197 </data> | 180 </data> |
198 <data name="HCPC_mutual_info" format="txt" label="External validation of clustering from ${visualisation.visu_choice} of ${on_string}" > | 181 <data name="HCPC_contributions" format="tabular" label="Cluster information from ${visualisation.visu_choice}"> |
182 <filter>visualisation['visu_choice'] == 'HCPC'</filter> | |
183 </data> | |
184 <data name="HCPC_mutual_info" format="txt" label="External validation of clustering from ${visualisation.visu_choice} of ${on_string}"> | |
199 <filter>visualisation['visu_choice'] == 'HCPC' and factor_condition['factor_choice'] == 'Yes'</filter> | 185 <filter>visualisation['visu_choice'] == 'HCPC' and factor_condition['factor_choice'] == 'Yes'</filter> |
200 </data> | |
201 <data name="HCPC_clust" format="tabular" label="Clustering table from ${visualisation.visu_choice} of ${on_string}" > | |
202 <filter>visualisation['visu_choice'] == 'HCPC' and visualisation['res_clustering'] == 'yes'</filter> | |
203 </data> | |
204 <data name="HCPC_cluster_description" format="tabular" label="Cluster information from ${visualisation.visu_choice}" > | |
205 <filter>visualisation['visu_choice'] == 'HCPC' </filter> | |
206 </data> | 186 </data> |
207 </outputs> | 187 </outputs> |
208 <tests> | 188 <tests> |
209 <!-- test first (for developpers) --> | 189 <!-- test tSNE --> |
210 <!-- test PCA --> | 190 <test expect_num_outputs="1"> |
211 <test> | |
212 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
213 <param name="labels" value="yes" /> | |
214 <param name="visu_choice" value="PCA" /> | |
215 <param name="factor_choice" value="No" /> | |
216 <output name="pdf_out" file="pca.labels.pdf" ftype="pdf"/> | |
217 </test> | |
218 <test> | |
219 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
220 <param name="labels" value="no" /> | |
221 <param name="visu_choice" value="PCA" /> | |
222 <param name="factor_choice" value="No" /> | |
223 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/> | |
224 </test> | |
225 <!-- test PCA PC2 vs PC3 --> | |
226 <test> | |
227 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
228 <param name="labels" value="no" /> | |
229 <param name="visu_choice" value="PCA" /> | |
230 <param name="factor_choice" value="No" /> | |
231 <param name="PCA_x_axis" value="2" /> | |
232 <param name="PCA_y_axis" value="3" /> | |
233 <output name="pdf_out" file="pca.2vs3.pdf" ftype="pdf"/> | |
234 </test> | |
235 | |
236 <!-- test Coordinates tables on PCA --> | |
237 <test> | |
238 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
239 <param name="labels" value="no" /> | |
240 <param name="visu_choice" value="PCA" /> | |
241 <param name="coord" value="yes" /> | |
242 <param name="factor_choice" value="No" /> | |
243 <output name="pdf_out" file="pca.nolabels.pdf" ftype="pdf"/> | |
244 <output name="table_coordinates" file="pca.coord.tab" ftype="tabular"/> | |
245 </test> | |
246 <!-- test factor contrasting on PCA --> | |
247 <test> | |
248 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
249 <param name="labels" value="no" /> | |
250 <param name="visu_choice" value="PCA" /> | |
251 <param name="factor_choice" value="Yes" /> | |
252 <param name="factor" value="factor.tsv" ftype="txt"/> | |
253 <output name="pdf_out" file="pca.nolabels.factors.pdf" ftype="pdf"/> | |
254 </test> | |
255 <!-- test two-level factor contrasting on PCA --> | |
256 <test> | |
257 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
258 <param name="labels" value="no" /> | |
259 <param name="visu_choice" value="PCA" /> | |
260 <param name="factor_choice" value="Yes" /> | |
261 <param name="factor" value="2-lev_factor.tsv" ftype="txt"/> | |
262 <output name="pdf_out" file="pca.nolabels.2-lev-factor.pdf" compare="sim_size" ftype="pdf"/> | |
263 </test> | |
264 <!-- test numerical factor contrasting on PCA --> | |
265 <test> | |
266 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
267 <param name="labels" value="no" /> | |
268 <param name="visu_choice" value="PCA" /> | |
269 <param name="factor_choice" value="Yes" /> | |
270 <param name="factor" value="numeric_factor.tsv" ftype="txt"/> | |
271 <output name="pdf_out" file="pca.nolabels.numerical-factor.pdf" compare="sim_size" ftype="pdf"/> | |
272 </test> | |
273 <test> | |
274 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
275 <param name="labels" value="no" /> | |
276 <param name="visu_choice" value="PCA" /> | |
277 <param name="factor_choice" value="Yes" /> | |
278 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> | |
279 <output name="pdf_out" file="pca.nolabels.factors.pdf" compare="sim_size" ftype="pdf"/> | |
280 </test> | |
281 <!-- test HCPC --> | |
282 <test> | |
283 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
284 <param name="labels" value="yes" /> | |
285 <param name="visu_choice" value="HCPC" /> | |
286 <param name="HCPC_npc" value="5"/> | |
287 <param name="HCPC_ncluster" value="-1"/> | |
288 <output name="pdf_out" file="hcpc.labels.pdf" compare="sim_size" ftype="pdf"/> | |
289 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/> | |
290 </test> | |
291 <!-- test factor contrasting on HCPC --> | |
292 <test> | |
293 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
294 <param name="labels" value="no" /> | |
295 <param name="visu_choice" value="HCPC" /> | |
296 <param name="HCPC_npc" value="5"/> | |
297 <param name="HCPC_ncluster" value="-1"/> | |
298 <param name="res_clustering" value="yes"/> | |
299 <param name="factor_choice" value="Yes" /> | |
300 <param name="factor" value="factor.tsv" ftype="txt"/> | |
301 <output name="pdf_out" file="hcpc.nolabels.factor.pdf" compare="sim_size" ftype="pdf"/> | |
302 <output name="HCPC_mutual_info" file="hcpc.factor.extval.txt" ftype="txt"/> | |
303 <output name="HCPC_clust" file="hcpc.clusters.tab" ftype="tabular"/> | |
304 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/> | |
305 </test> | |
306 <test> | |
307 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
308 <param name="labels" value="no" /> | |
309 <param name="HCPC_npc" value="5"/> | |
310 <param name="HCPC_ncluster" value="-1"/> | |
311 <param name="visu_choice" value="HCPC" /> | |
312 <output name="pdf_out" file="hcpc.nolabels.pdf" compare="sim_size" ftype="pdf"/> | |
313 <output name="HCPC_cluster_description" file="hcpc.cluster_description.1.tab" ftype="tabular"/> | |
314 </test> | |
315 <test> | |
316 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
317 <param name="labels" value="yes" /> | |
318 <param name="visu_choice" value="HCPC" /> | |
319 <param name="coord" value="yes" /> | |
320 <param name="HCPC_method" value="average"/> | |
321 <param name="HCPC_metric" value="manhattan"/> | |
322 <param name="HCPC_npc" value="4" /> | |
323 <output name="pdf_out" file="hcpc-2.labels.pdf" ftype="pdf"/> | |
324 <output name="table_coordinates" file="hcpc-2.coord.tab" ftype="tabular"/> | |
325 <output name="HCPC_cluster_description" file="hcpc.cluster_description.4.tab" ftype="tabular"/> | |
326 </test> | |
327 <test> | |
328 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
329 <param name="labels" value="yes" /> | |
330 <param name="visu_choice" value="HCPC" /> | |
331 <param name="coord" value="yes" /> | |
332 <param name="HCPC_method" value="single"/> | |
333 <param name="HCPC_metric" value="euclidean"/> | |
334 <param name="HCPC_npc" value="4" /> | |
335 <param name="HCPC_clusterCA" value="cols" /> | |
336 <output name="pdf_out" file="hcpc-3.labels.pdf" compare="sim_size" ftype="pdf"/> | |
337 <output name="table_coordinates" file="hcpc-3.coord.tab" ftype="tabular"/> | |
338 <output name="HCPC_cluster_description" file="hcpc.cluster_description.5.tab" ftype="tabular"/> | |
339 </test> | |
340 <!-- test t-SNE --> | |
341 <test> | |
342 <param name="input" value="cpm_input.tsv" ftype="txt"/> | 191 <param name="input" value="cpm_input.tsv" ftype="txt"/> |
343 <param name="labels" value="yes" /> | 192 <param name="labels" value="yes" /> |
344 <param name="visu_choice" value="tSNE" /> | 193 <param name="visu_choice" value="tSNE" /> |
345 <param name="Rtsne_seed" value="49"/> | 194 <param name="Rtsne_seed" value="49"/> |
346 <param name="Rtsne_perplexity" value="10"/> | 195 <param name="Rtsne_perplexity" value="10"/> |
347 <param name="Rtsne_theta" value="1" /> | 196 <param name="Rtsne_theta" value="1" /> |
348 <output name="pdf_out" file="tsne.labels.pdf" ftype="pdf" compare="sim_size" delta="500"/> | 197 <output name="pdf_out" file="tsne.1.pdf" ftype="pdf" compare="sim_size" delta="500"/> |
349 </test> | 198 </test> |
350 <test> | 199 <test expect_num_outputs="1"> |
351 <param name="input" value="cpm_input.tsv" ftype="txt"/> | 200 <param name="input" value="cpm_input.tsv" ftype="txt"/> |
352 <param name="labels" value="no" /> | 201 <param name="labels" value="no" /> |
353 <param name="visu_choice" value="tSNE" /> | 202 <param name="visu_choice" value="tSNE" /> |
203 <param name="factor_choice" value="Yes" /> | |
204 <param name="factor" value="numeric_factor.tsv" ftype="txt"/> | |
354 <param name="Rtsne_seed" value="49"/> | 205 <param name="Rtsne_seed" value="49"/> |
355 <param name="Rtsne_perplexity" value="10"/> | 206 <param name="Rtsne_perplexity" value="10"/> |
356 <param name="Rtsne_theta" value="1" /> | 207 <param name="Rtsne_theta" value="1" /> |
357 <output name="pdf_out" file="tsne.nolabels.pdf" ftype="pdf" compare="sim_size" delta="500"/> | 208 <output name="pdf_out" file="tsne.2.pdf" ftype="pdf" compare="sim_size" delta="500"/> |
358 </test> | 209 </test> |
359 <!-- test factor contrasting on t-SNE --> | 210 <test expect_num_outputs="1"> |
360 <test> | |
361 <param name="input" value="cpm_input.tsv" ftype="txt"/> | 211 <param name="input" value="cpm_input.tsv" ftype="txt"/> |
362 <param name="labels" value="yes" /> | 212 <param name="labels" value="yes" /> |
363 <param name="visu_choice" value="tSNE" /> | 213 <param name="visu_choice" value="tSNE" /> |
364 <param name="factor_choice" value="Yes" /> | 214 <param name="factor_choice" value="Yes" /> |
365 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> | 215 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> |
366 <param name="Rtsne_seed" value="49"/> | 216 <param name="Rtsne_seed" value="49"/> |
367 <param name="Rtsne_perplexity" value="10"/> | 217 <param name="Rtsne_perplexity" value="10"/> |
368 <param name="Rtsne_theta" value="1" /> | 218 <param name="Rtsne_theta" value="1" /> |
369 <output name="pdf_out" file="tsne.labels.factor.pdf" ftype="pdf" compare="sim_size" delta="500"/> | 219 <output name="pdf_out" file="tsne.3.pdf" ftype="pdf" compare="sim_size" delta="500"/> |
370 </test> | 220 </test> |
371 <test> | 221 <test expect_num_outputs="1"> |
372 <param name="input" value="cpm_input.tsv" ftype="txt"/> | 222 <param name="input" value="cpm_input.tsv" ftype="txt"/> |
373 <param name="labels" value="no" /> | 223 <param name="labels" value="no" /> |
374 <param name="visu_choice" value="tSNE" /> | 224 <param name="visu_choice" value="tSNE" /> |
375 <param name="Rtsne_seed" value="49" /> | 225 <param name="Rtsne_seed" value="49" /> |
376 <param name="coord" value="yes" /> | |
377 <param name="Rtsne_dims" value="3" /> | 226 <param name="Rtsne_dims" value="3" /> |
378 <param name="Rtsne_perplexity" value="10"/> | 227 <param name="Rtsne_perplexity" value="10"/> |
379 <param name="Rtsne_theta" value="1" /> | 228 <param name="Rtsne_theta" value="1" /> |
380 <param name="Rtsne_normalize" value="FALSE" /> | 229 <param name="Rtsne_normalize" value="FALSE" /> |
381 <output name="pdf_out" file="tsne-2.nolabels.pdf" ftype="pdf" compare="sim_size" delta="1000"/> | 230 <output name="pdf_out" file="tsne.4.pdf" ftype="pdf" compare="sim_size" delta="1000"/> |
382 <output name="table_coordinates" file="tsne-2.coord.tab" ftype="tabular" compare="sim_size" delta="1000"/> | 231 </test> |
232 <!-- test PCA --> | |
233 <test expect_num_outputs="1"> | |
234 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
235 <param name="labels" value="yes" /> | |
236 <param name="visu_choice" value="PCA" /> | |
237 <param name="factor_choice" value="No" /> | |
238 <param name="item_size" value="0.5" /> | |
239 <output name="pdf_out" file="pca.1.pdf" ftype="pdf"/> | |
240 </test> | |
241 <!-- test PCA PC2 vs PC3 --> | |
242 <test expect_num_outputs="1"> | |
243 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
244 <param name="labels" value="no" /> | |
245 <param name="visu_choice" value="PCA" /> | |
246 <param name="factor_choice" value="No" /> | |
247 <param name="x_axis" value="2" /> | |
248 <param name="y_axis" value="3" /> | |
249 <output name="pdf_out" file="pca.2.pdf" ftype="pdf"/> | |
250 </test> | |
251 <!-- test factor contrasting on PCA --> | |
252 <test expect_num_outputs="1"> | |
253 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
254 <param name="labels" value="no" /> | |
255 <param name="visu_choice" value="PCA" /> | |
256 <param name="factor_choice" value="Yes" /> | |
257 <param name="factor" value="factor.tsv" ftype="txt"/> | |
258 <output name="pdf_out" file="pca.3.pdf" ftype="pdf"/> | |
259 </test> | |
260 <!-- test numerical factor contrasting on PCA --> | |
261 <test expect_num_outputs="1"> | |
262 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
263 <param name="labels" value="no" /> | |
264 <param name="visu_choice" value="PCA" /> | |
265 <param name="factor_choice" value="Yes" /> | |
266 <param name="factor" value="numeric_factor.tsv" ftype="txt"/> | |
267 <output name="pdf_out" file="pca.4.pdf" compare="sim_size" ftype="pdf"/> | |
268 </test> | |
269 <test expect_num_outputs="1"> | |
270 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
271 <param name="labels" value="no" /> | |
272 <param name="visu_choice" value="PCA" /> | |
273 <param name="factor_choice" value="Yes" /> | |
274 <param name="factor" value="shuffled_factor.tsv" ftype="txt"/> | |
275 <output name="pdf_out" file="pca.5.pdf" compare="sim_size" ftype="pdf"/> | |
276 </test> | |
277 <!-- HCPC tests --> | |
278 <test expect_num_outputs="3"> | |
279 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
280 <param name="labels" value="yes" /> | |
281 <param name="visu_choice" value="HCPC" /> | |
282 <param name="HCPC_npc" value="5"/> | |
283 <param name="HCPC_ncluster" value="-1"/> | |
284 <output name="pdf_out" file="hcpc.1.pdf" compare="sim_size" ftype="pdf"/> | |
285 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.1.tsv" ftype="tabular"/> | |
286 <output name="HCPC_contributions" file="hcpc.component-impact.1.tsv" ftype="tabular"/> | |
287 </test> | |
288 <test expect_num_outputs="4"> | |
289 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
290 <param name="labels" value="no" /> | |
291 <param name="visu_choice" value="HCPC" /> | |
292 <param name="HCPC_npc" value="5"/> | |
293 <param name="HCPC_ncluster" value="-1"/> | |
294 <param name="factor_choice" value="Yes" /> | |
295 <param name="factor" value="factor.tsv" ftype="txt"/> | |
296 <output name="pdf_out" file="hcpc.2.pdf" compare="sim_size" ftype="pdf"/> | |
297 <output name="HCPC_mutual_info" file="hcpc.factor.extval.txt" ftype="txt"/> | |
298 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.2.tsv" ftype="tabular"/> | |
299 <output name="HCPC_contributions" file="hcpc.component-impact.2.tsv" ftype="tabular"/> | |
300 </test> | |
301 <test expect_num_outputs="4"> | |
302 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
303 <param name="labels" value="yes" /> | |
304 <param name="visu_choice" value="HCPC" /> | |
305 <param name="factor_choice" value="Yes" /> | |
306 <param name="factor" value="factor.tsv" ftype="txt"/> | |
307 <param name="HCPC_method" value="average"/> | |
308 <param name="HCPC_metric" value="manhattan"/> | |
309 <param name="HCPC_npc" value="4" /> | |
310 <output name="pdf_out" file="hcpc.3.pdf" ftype="pdf"/> | |
311 <output name="HCPC_mutual_info" file="hcpc.extval.1.txt" ftype="txt"/> | |
312 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.3.tsv" ftype="tabular"/> | |
313 <output name="HCPC_contributions" file="hcpc.component-impact.3.tsv" ftype="tabular"/> | |
314 </test> | |
315 <test expect_num_outputs="3"> | |
316 <param name="input" value="cpm_input.tsv" ftype="txt"/> | |
317 <param name="labels" value="no" /> | |
318 <param name="visu_choice" value="HCPC" /> | |
319 <param name="HCPC_method" value="single"/> | |
320 <param name="HCPC_metric" value="euclidean"/> | |
321 <param name="HCPC_npc" value="4" /> | |
322 <param name="HCPC_clusterCA" value="cols" /> | |
323 <output name="pdf_out" file="hcpc.4.pdf" compare="sim_size" ftype="pdf"/> | |
324 <output name="HCPC_cell_clust" file="hcpc.cell-cluster.4.tsv" ftype="tabular"/> | |
325 <output name="HCPC_contributions" file="hcpc.component-impact.4.tsv" ftype="tabular"/> | |
383 </test> | 326 </test> |
384 </tests> | 327 </tests> |
385 <help> | 328 <help> |
386 | 329 |
387 **What it does** | 330 **What it does** |
388 | 331 |
389 Takes as an input a matrix of n observations (columns, generally n RNAseq library) of k variables | 332 **Inputs** |
390 (rows, generally k genes). | 333 Takes as an input a tabulation separated value file (tsv) of n observations (columns, |
334 generally n RNAseq library) of k variables (rows, generally k genes). | |
335 | |
336 The table must contain a header, ie the first line describes the content of each column. | |
391 | 337 |
392 k variables define a space of k dimensions. Any observation | 338 k variables define a space of k dimensions. Any observation |
393 of k expression values for k genes (the purpose of one RNAseq experiment) can be assigned | 339 of k expression values for k genes (the purpose of one RNAseq experiment) can be assigned |
394 to a position in the k-dim space, of coordinates c1, c2, c3, ..., ck. | 340 to a position in the k-dim space, of coordinates c1, c2, c3, ..., ck. |
395 | 341 |
396 Since visualisation in more than 3 dimensions is not easy for a human beeing, there is | 342 Since visualisation in more than 3 dimensions is not easy for a human beeing, there is |
397 a number of methods to "reduce" or "project" a k-dim space in a space of 2 or 3 dimensions. | 343 a number of methods to "reduce" or "project" a k-dim space in a space of 2 or 3 dimensions. |
398 This is of great help, not only to summarise the data, but also to find similarities, common trends | 344 This is of great help, not only to summarise the data, but also to find similarities, common trends |
399 between the data (under the hypothesis that similar data are closer in the k-dimension space). | 345 between the data (under the hypothesis that similar data are closer in the k-dimension space). |
400 | 346 |
347 **Outputs** | |
348 | |
401 This tool returns the visualisation of a dimensional reduction using either: | 349 This tool returns the visualisation of a dimensional reduction using either: |
402 | 350 |
403 * Principal Components Analysis (PCA) | 351 * Principal Components Analysis (PCA) |
404 * Hierarchical Clustering of Principal Components (HCPC) | 352 * Hierarchical Clustering of Principal Components (HCPC) |
405 * t-distributed Stochastic Neighbor Embedding (t-SNE) | 353 * t-distributed Stochastic Neighbor Embedding (t-SNE) |
406 | 354 |
407 The tool returns in addition the table of the coordinates of the observations (eg RNAseq libraries) | 355 If HCPC is used, this tool can also return a 2-column cluster correspondence table: |
408 in the low dim space, which can be used for post-treatment or to further adjust the provided visualisation. | |
409 | |
410 If HCPC is used, this tool can also return the clustering table. It contains two columns of n observations : | |
411 | 356 |
412 * Observation labels | 357 * Observation labels |
413 * Cluster labels | 358 * Cluster labels |
414 | 359 |
415 ** Contrast data with a factor ** | 360 **Contrast data with a factor** |
361 | |
416 The tool offers the possibility to colour data points according to the levels of a factor. | 362 The tool offers the possibility to colour data points according to the levels of a factor. |
417 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table | 363 To use the option "Factor to contrast data", provide a tabulated-separated, two-column table |
418 with first column containing the cell/data library identifiers (same identifiers as those | 364 with first column containing the cell/data library identifiers (same identifiers as those |
419 provided as column headers in the input data table) and second column containing the corresponding | 365 provided as column headers in the input data table) and second column containing the corresponding |
420 factor levels value (if this vector is numerical, then the color palette used is quantitative). | 366 factor levels value (if this vector is numerical, then the color palette used is quantitative). |
421 This table does not need to be sorted in the same order as in the data | 367 This table does not need to be sorted in the same order as in the data |
422 table. It may also contain more identifiers than those provided in the data table. | 368 table. It may also contain more identifiers than those provided in the data table. |
423 | 369 |
424 If HCPC visualisation and constrasting data are chosen, an additional text file is given. It contains | 370 If HCPC visualisation and constrasting factor is provided, a text file containing metrics |
425 several metrics of external validation of clustering. It will compare the capacity of HCPC clustering | 371 of external validation of the clustering is returned. |
426 to recreate classes contained in the factor data file. If the constrasting factor is quantitative, | 372 These metrics measure the capacity of HCPC clustering to find classes overlapping the levels |
427 the file will be empty. | 373 of the provided factor. |
428 | |
429 | 374 |
430 </help> | 375 </help> |
431 <citations> | 376 <citations> |
432 <citation type="bibtex">@Article{, | 377 <citation type="bibtex">@Article{, |
433 title = {Visualizing High-Dimensional Data Using t-SNE}, | 378 title = {Visualizing High-Dimensional Data Using t-SNE}, |