Mercurial > repos > iuc > dimet_pca_plot
comparison dimet_pca_plot.xml @ 0:952808ffa842 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit abca848510cb4ac8d09d95634147626ea578cdf0
author | iuc |
---|---|
date | Tue, 10 Oct 2023 11:51:59 +0000 |
parents | |
children | 23f87b8cc62b |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:952808ffa842 |
---|---|
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05"> | |
2 <description> | |
3 Figures of Principal Component Analysis for tracer metabolomics (by DIMet) | |
4 </description> | |
5 <macros> | |
6 <token name="@TOOL_LABEL@">pca plot</token> | |
7 <token name="@EXECUTABLE@">pca_plot</token> | |
8 <import>macros.xml</import> | |
9 </macros> | |
10 <expand macro="requirements"/> | |
11 <command detect_errors="exit_code"><![CDATA[ | |
12 @INIT_CONFIG@ | |
13 @INIT_PCA@ | |
14 @INIT_IMPUTE_VALUES@ | |
15 @INIT_CONDITIONS@ | |
16 HYDRA_FULL_ERROR=1 python -m dimet | |
17 -cp '$__new_file_path__/config' | |
18 '++hydra.run.dir=pca-plot' | |
19 '++figure_path=figures' | |
20 '++table_path=tables' | |
21 '++analysis={ | |
22 dataset:{ | |
23 _target_:dimet.data.DatasetConfig, | |
24 name: "Galaxy DIMet run" | |
25 }, | |
26 method:{ | |
27 _target_: dimet.method.PcaPlotConfig, | |
28 label: pca-plot, | |
29 name: "Generate Principal Component Analysis plots", | |
30 pca_split_further:['timepoint'], | |
31 draw_ellipses: null, | |
32 run_iris_demo: false, | |
33 color: condition, | |
34 style: timepoint, | |
35 figure_format:${output_options.figure_format}, | |
36 impute_values:${impute_values} | |
37 }, | |
38 label: pca-plot | |
39 }' | |
40 '++analysis.dataset.subfolder=' | |
41 '++analysis.dataset.label=' | |
42 '++analysis.dataset.conditions=${conds}' | |
43 #if $metadata_path: | |
44 '++analysis.dataset.metadata=metadata' | |
45 #end if | |
46 #if $abundance_file: | |
47 '++analysis.dataset.abundances=abundance' | |
48 #end if | |
49 #if $me_or_frac_contrib_file: | |
50 '++analysis.dataset.mean_enrichment=me_or_frac_contrib' | |
51 #end if | |
52 @REMOVE_CONFIG@ | |
53 | |
54 ]]></command> | |
55 <inputs> | |
56 <expand macro="input_parameters_pca"/> | |
57 <expand macro="conditions"/> | |
58 <section name="output_options" title="Output options"> | |
59 <param name="figure_format" type="select" value="pdf" display="radio" label="Select output figure format" help="Please enter at max 1 format"> | |
60 <option value="pdf">Pdf</option> | |
61 <option value="svg">Svg</option> | |
62 </param> | |
63 </section> | |
64 </inputs> | |
65 | |
66 <outputs> | |
67 <collection name="report" type="list"> | |
68 <discover_datasets pattern="__designation_and_ext__" directory="figures"/> | |
69 </collection> | |
70 </outputs> | |
71 <tests> | |
72 <test> | |
73 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv" /> | |
74 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/> | |
75 <param name="conditions" value='Control,L-Cycloserine'/> | |
76 <section name="output_options"> | |
77 <param name="figure_format" value="svg"/> | |
78 </section> | |
79 <output_collection name="report" type="list" count="18"> | |
80 <element file="abundances--T0--cell_var.svg" name="abundances--T0--cell_var" ftype="svg" compare="sim_size" delta="100"/> | |
81 <element file="abundances--T0--cell--label-n_pc.svg" name="abundances--T0--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
82 <element file="abundances--T0--cell--label-y_pc.svg" name="abundances--T0--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
83 <element file="abundances--T0--med_var.svg" name="abundances--T0--med_var" ftype="svg" compare="sim_size" delta="100"/> | |
84 <element file="abundances--T0--med--label-n_pc.svg" name="abundances--T0--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
85 <element file="abundances--T0--med--label-y_pc.svg" name="abundances--T0--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
86 <element file="abundances--T2h--cell--label-n_pc.svg" name="abundances--T2h--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
87 <element file="abundances--T2h--cell--label-y_pc.svg" name="abundances--T2h--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
88 <element file="abundances--T2h--cell_var.svg" name="abundances--T2h--cell_var" ftype="svg" compare="sim_size" delta="100"/> | |
89 <element file="abundances--T2h--med--label-n_pc.svg" name="abundances--T2h--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
90 <element file="abundances--T2h--med--label-y_pc.svg" name="abundances--T2h--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
91 <element file="abundances--T2h--med_var.svg" name="abundances--T2h--med_var" ftype="svg" compare="sim_size" delta="100"/> | |
92 <element file="abundances--cell--label-n_pc.svg" name="abundances--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
93 <element file="abundances--cell--label-y_pc.svg" name="abundances--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
94 <element file="abundances--cell_var.svg" name="abundances--cell_var" ftype="svg" compare="sim_size" delta="100"/> | |
95 <element file="abundances--med--label-n_pc.svg" name="abundances--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/> | |
96 <element file="abundances--med--label-y_pc.svg" name="abundances--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/> | |
97 <element file="abundances--med_var.svg" name="abundances--med_var" ftype="svg" compare="sim_size" delta="100"/> | |
98 </output_collection> | |
99 </test> | |
100 </tests> | |
101 <help><![CDATA[ | |
102 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/). | |
103 | |
104 This tool performs the Principal Components Analysis (PCA) on your data, | |
105 generating the figures with the results of the PCA, it is, the scatter-plot of the two first principal components or dimensions (eigenvalues), and the barplot of the percentage of explained variances across all the principal components detected in your data. | |
106 | |
107 The figures in .pdf format are of publication quality, and as they are vectorial images you can open them and customize aesthetics with a professional image software such as Inkscape, Adobe Illustrator, Sketch, CorelDRAW, etc. | |
108 | |
109 | |
110 **Input data files** | |
111 | |
112 This tool requires (at max.) 3 tab-delimited .csv files as inputs. There are two types of files: | |
113 | |
114 - The measures' (or quantifications') files, that can be of 4 types. | |
115 | |
116 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory. | |
117 | |
118 For running DIMet @EXECUTABLE@ you need **at least one file** of measures: | |
119 | |
120 - The total **abundances** (of the metabolites) file | |
121 | |
122 - The mean **enrichment** or labelled fractional contributions | |
123 | |
124 | |
125 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**. | |
126 | |
127 | |
128 **Measures' files** | |
129 | |
130 The measure's files must be organized as matrices: | |
131 | |
132 - The first column must contain Metabolite IDs that are unique (not repeated) within the file. | |
133 | |
134 - The rest of the columns correspond to the samples | |
135 | |
136 - The rows correspond to the metabolites | |
137 | |
138 - The values must be tab separated, with the first row containing the sample/column labels. | |
139 | |
140 See the following examples of measures files: | |
141 | |
142 | |
143 Example - Metabolites **abundances**: | |
144 | |
145 =============== ================== ================== ================== ================== ================== ================== | |
146 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
147 =============== ================== ================== ================== ================== ================== ================== | |
148 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956 | |
149 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051 | |
150 Glc6P 2310 2142 2683 1683 012532068 1252172 | |
151 Gly3P 399298 991656565 525195 6365231 89451625 4952651963 | |
152 IsoCit 0 0 0 84915613 856236 954651610 | |
153 =============== ================== ================== ================== ================== ================== ================== | |
154 | |
155 Example - mean **enrichment** or labeled fractional contributions: | |
156 | |
157 =============== ================== ================== ================== ================== ================== ================== | |
158 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06** | |
159 =============== ================== ================== ================== ================== ================== ================== | |
160 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9 | |
161 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68 | |
162 Glc6P 0.06 0.66 2683 0.06 2068 2172 | |
163 Gly3P 0.06 0.06 0.06 1 5 3 | |
164 IsoCit 0.06 1 0.49 0.36 6 10 | |
165 =============== ================== ================== ================== ================== ================== ================== | |
166 | |
167 | |
168 **Metadata File Information** | |
169 | |
170 Provide a tab-separated file that has the names of the samples in the first column and one header row. | |
171 Column names must be exactly in this order: | |
172 | |
173 name_to_plot | |
174 condition | |
175 timepoint | |
176 timenum | |
177 compartment | |
178 original_name | |
179 | |
180 | |
181 Example **Metadata File**: | |
182 | |
183 | |
184 ==================== =============== ============= ============ ================ ================= | |
185 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name** | |
186 -------------------- --------------- ------------- ------------ ---------------- ----------------- | |
187 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01 | |
188 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02 | |
189 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03 | |
190 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04 | |
191 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05 | |
192 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06 | |
193 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07 | |
194 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08 | |
195 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01 | |
196 Control_med_T24-1 Control T24 24 med MCF001090_TD02 | |
197 Control_med_T24-2 Control T24 24 med MCF001090_TD03 | |
198 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04 | |
199 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05 | |
200 Control_med_T0-1 Control T0 0 med MCF001090_TD06 | |
201 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07 | |
202 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08 | |
203 ==================== =============== ============= ============ ================ ================= | |
204 | |
205 | |
206 The column **original_name** must have the names of the samples as given in your data. | |
207 | |
208 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that | |
209 are meaningful is a better choice, as we will take them to display the results. | |
210 | |
211 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) | |
212 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!). | |
213 | |
214 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the | |
215 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column. | |
216 | |
217 | |
218 **Running the analysis** | |
219 | |
220 You can precise how you want your analysis to be executed, there exist hints on use that will guide you, next to the parameters. | |
221 | |
222 Our tool automatically processes the integrality of your data (global PCA plots), and also splits your data by timepoint to generate PCA plots by timepoint (which is convenient to explore the "grouping" of conditions), but if you only have one condition you can discard them. | |
223 | |
224 The output, for the global PCA, and for each timepoint PCA, consists of : | |
225 | |
226 - a scatter-plot of the first two Principal Components (PC), with labels corresponding to 'name_to_plot'. | |
227 | |
228 - the same scatter-plot as above but without labels. | |
229 | |
230 - a bar-plot of the percentage of explained variances. | |
231 | |
232 | |
233 | |
234 **Available data for testing** | |
235 | |
236 You can test our tool with the data from our manuscript https://zenodo.org/record/8378887 (the pertinent | |
237 files for you are located in the subfolders inside the data folder). | |
238 You can also use the minimal data examples from https://zenodo.org/record/8380706 | |
239 | |
240 ]]> | |
241 </help> | |
242 <expand macro="citations" /> | |
243 </tool> |