comparison ionflow/ionflow.xml @ 0:3b461dc9568b draft default tip

Uploaded
author metaboflow_cam
date Mon, 09 Aug 2021 09:41:22 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3b461dc9568b
1 <!--
2 wl-10-08-2020, Mon: commence
3 wl-24-08-2020, Mon: first version
4 wl-16-11-2020, Mon: second version
5 wl-23-03-2021, Tue: third version
6 wl-08-06-2021, Tue: fourth version: new stuff from Jacopo
7 -->
8
9 <tool id="ionfow" name="IonFlow" version="0.4.0">
10 <description>
11 Pipeline for processing and analysis of ionomics data
12 </description>
13 <macros>
14 <import>macros.xml</import>
15 </macros>
16 <expand macro="requirements" />
17 <expand macro="stdio" />
18
19 <!-- =============================================================== -->
20 <command detect_errors="exit_code">
21 <![CDATA[
22
23 Rscript ${__tool_directory__}/ionflow.R
24 ## Input
25 --ion_file '$ion_file'
26 --var_id '$pre.var_id'
27 --batch_id '$pre.batch_id'
28 --data_id '$pre.data_id'
29 --method_norm '$pre.method_norm'
30 --batch_control '$pre.batch.batch_control'
31 #if $pre.batch.batch_control=='yes'
32 --control_lines '$pre.batch.control_lines'
33 --control_use '$pre.batch.control_use'
34 #end if
35 --method_outliers '$pre.method_outliers'
36 --thres_outl '$pre.thres_outl'
37 --stand_method '$pre.stand.stand_method'
38 #if $pre.stand.stand_method=='custom':
39 --std_file '$pre.stand.std_file'
40 #end if
41 --thres_symb '$pre.thres_symb'
42
43 ## Exploratory analysis
44 --thres_ion_corr '$expl.thres_ion_corr'
45
46 ## Clustering analysis
47 --min_clust_size '$clus.min_clust_size'
48 --h_tree '$clus.h_tree'
49 --filter_zero_string '$clus.filter_zero_string'
50
51 ## Enrichment analysis
52 --pval '$enri.pval'
53 --min_count '$enri.min_count'
54 --ont '$enri.ont'
55 --annot_pkg '$enri.annot_pkg'
56
57 ## Network analysis
58 --method_corr '$net.method_corr'
59 --thres_corr '$net.thres_corr'
60
61 ## output: pre-processing
62 --pre_proc_pdf '$pre_proc_pdf'
63 --df_stats_out '$df_stats_out'
64 --outl_out '$outl_out'
65 --data_wide_out '$data_wide_out'
66 --data_wide_symb_out '$data_wide_symb_out'
67
68 ## output: exploratory analysis
69 --expl_anal_pdf '$expl_anal_pdf'
70
71 ## output: clustering analysis
72 --clus_anal_pdf '$clus_anal_pdf'
73
74 ## output: enrichment analysis
75 --go_en_out '$go_en_out'
76
77 ## output: network analysis
78 --gene_net_pdf '$gene_net_pdf'
79 --imbe_out '$imbe_out'
80 ]]>
81 </command>
82
83 <!-- =============================================================== -->
84 <inputs>
85 <param name="ion_file" type="data" format="csv"
86 label="Ion data table"
87 help="Ion data table with columns of Ions and meta information." />
88
89 <!-- start of pre -->
90 <section name="pre" title="Pre Processing" >
91
92 <param name="var_id" type="integer" value="1"
93 label="Specify variable column index of input data"
94 help="Indicate which column will be the variable (ORF or SYMBOL)." />
95
96 <param name="batch_id" type="integer" value="3"
97 label="Specify batch ID column index of input data"
98 help="Indicate which column will be batch ID." />
99
100 <param name="data_id" type="integer" value="5"
101 label="Specify data start column index of input data"
102 help="Indicate which column will be the start of data matrix." />
103
104 <param name="method_norm" type="select"
105 label="Select a method for batch correction">
106 <option value="median" selected="true">Median</option>
107 <option value="median+std">Median plus std</option>
108 <option value="none">None</option>
109 </param>
110
111 <!-- batch control -->
112 <conditional name="batch">
113 <param name="batch_control" type="select"
114 label="Use control lines for batch correction or not" >
115 <option value="yes" selected="true">Yes</option>
116 <option value="no">No</option>
117 </param>
118
119 <when value="yes">
120 <param name="control_lines" type="text" value="BY4741"
121 label="Specify batch control lines (rows)">
122 <sanitizer>
123 <valid initial="string.ascii_letters,string.digits"></valid>
124 </sanitizer>
125 </param>
126
127 <param name="control_use" type="select"
128 label="Select lines for batch correction">
129 <option value="control">Use control lines for batch correction</option>
130 <option value="all" selected="true">Use all lines for batch correction</option>
131 <option value="control.out">Use all lines except control lines for batch correction</option>
132 </param>
133 </when>
134
135 <when value="no">
136 </when>
137 </conditional>
138
139 <param name="method_outliers" type="select"
140 label="Select a method for outlier detection">
141 <option value="IQR">IQR</option>
142 <option value="mad">MAD</option>
143 <option value="log.FC.dist" selected="true">log FC dist</option>
144 <option value="none">none</option>
145 </param>
146
147 <param name="thres_outl" type="float" value="3.0"
148 label="Specify outlier detection threshold" />
149
150 <!-- standardisation method -->
151 <conditional name="stand">
152 <param name="stand_method" type="select"
153 label="Select a method for standardisation">
154 <option value="std" selected="true">STD</option>
155 <option value="mad">MAD</option>
156 <option value="custom">Custom</option>
157 </param>
158
159 <when value="custom">
160 <param name="std_file" type="data" format="tabular"
161 label="STD file"
162 help="A data matrix with only two columns. The fisrt
163 column is the names of ion and the second one is std
164 values. " />
165 </when>
166 </conditional>
167
168 <param name="thres_symb" type="float" value="2.0"
169 label="Specify symbolisation threshold" />
170
171 </section>
172 <!-- end of pre -->
173
174 <section name="expl" title="Exploratory analysis" >
175 <param name="thres_ion_corr" type="float" value="0.15"
176 label="Threshold for Ion correlation (0 - 1)" />
177 </section>
178
179 <section name="clus" title="Clustering analysis" >
180 <param name="min_clust_size" type="float" value="10.0"
181 label="Specify minimal cluster center number" />
182 <param name="h_tree" type="float" value="0.0"
183 label="Cutting height for hierarchical clustering" />
184 <param name="filter_zero_string" type="boolean" truevalue="True"
185 falsevalue="False" checked="True"
186 label="Filter the zero string?" />
187 </section>
188
189 <section name="enri" title="Enrichment analysis" >
190 <param name="pval" type="float" value="0.05"
191 label="Specify p-value threshold for enrichment analysiss" />
192 <param name="min_count" type="float" value="3.0"
193 label="Minimal count number for enrichment analysis" />
194 <param name="ont" type="select"
195 label="Select gene ontology for GO Terms">
196 <option value="BP" selected="true">BP</option>
197 <option value="MF">MF</option>
198 <option value="CC">CC</option>
199 </param>
200 <param name="annot_pkg" type="select"
201 label="Select an annotation package">
202 <option value="org.Sc.sgd.db" selected="true">Yeast(org.Sc.sgd.db)</option>
203 <option value="org.Hs.eg.db">Human(org.Hs.eg.db)</option>
204 <option value="org.Mm.eg.db">Mouse(org.Mm.eg.db)</option>
205 </param>
206 </section>
207
208 <section name="net" title="Network analysis" >
209 <param name="method_corr" type="select"
210 label="Select a method for similarity measure">
211 <option value="pearson">Pearson</option>
212 <option value="spearman">Spearman</option>
213 <option value="kendall">Kendall</option>
214 <option value="cosine" selected="true">Cosine</option>
215 <option value="mahal_cosine">Mahalanobis Cosine</option>
216 <option value="hybrid_mahal_cosine">Hybrid Mahalanobis Cosine</option>
217 </param>
218 <param name="thres_corr" type="float" value="0.7" min="0" max="1"
219 label="Specify similarity threshold (0 - 1)" />
220 </section>
221
222 </inputs>
223
224
225 <!-- =============================================================== -->
226 <outputs>
227 <data format="pdf" name="pre_proc_pdf"
228 label="Pre-processing plots for Ions on ${on_string}" />
229 <data format="tabular" name="df_stats_out"
230 label="Statistical summary of data set on ${on_string}"/>
231 <data format="tabular" name="outl_out"
232 label="Outlier table on ${on_string}"/>
233 <data format="tabular" name="data_wide_out"
234 label="Pre-processed data in wide format on ${on_string}"/>
235 <data format="tabular" name="data_wide_symb_out"
236 label="Symbolization data in wide format on ${on_string}"/>
237 <data format="pdf" name="expl_anal_pdf"
238 label="Explanatation analysis plots for Ions on ${on_string}" />
239 <data format="pdf" name="clus_anal_pdf"
240 label="Explanatation analysis plots for Ions on ${on_string}" />
241 <data format="tabular" name="go_en_out"
242 label="GO enrichment table on ${on_string}"/>
243 <data format="pdf" name="gene_net_pdf"
244 label="Gene network plots on ${on_string}" />
245 <data format="tabular" name="imbe_out"
246 label="Impact and betweenness table on ${on_string}"/>
247 </outputs>
248
249 <!-- =============================================================== -->
250 <tests>
251 <test>
252 <param name="ion_file" value="Dataset_IonFlow_Ionome_KO_short.csv" />
253 <param name="var_id" value="1" />
254 <param name="batch_id" value="3" />
255 <param name="data_id" value="5" />
256 <param name="method_norm" value="median" />
257 <param name="batch_control" value="yes" />
258 <param name="control_lines" value="BY4741" />
259 <param name="control_use" value="all" />
260 <param name="method_outliers" value="log.FC.dist" />
261 <param name="thres_outl" value="3.0" />
262 <param name="stand_method" value="std" />
263 <param name="thres_symb" value="2" />
264 <param name="thres_ion_corr" value="0.15" />
265 <param name="min_clust_size" value="10.0" />
266 <param name="h_tree" value="0.0" />
267 <param name="filter_zero_string" value="TRUE" />
268 <param name="pval" value="0.05" />
269 <param name="min_count" value="3" />
270 <param name="ont" value="BP" />
271 <param name="annot_pkg" value="org.Sc.sgd.db" />
272 <param name="method_corr" value="cosine" />
273 <param name="thres_corr" value="0.7" />
274 <output name="pre_proc_pdf" file="res/pre_proc.pdf" compare="sim_size" />
275 <output name="df_stats_out" file="res/df_stats.tsv" compare="diff" />
276 <output name="outl_out" file="res/outl.tsv" compare="diff" />
277 <output name="data_wide_out" file="res/data_wide.tsv" compare="diff" />
278 <output name="data_wide_symb_out" file="res/data_wide_symb.tsv" compare="diff" />
279 <output name="expl_anal_pdf" file="res/expl_anal.pdf" compare="sim_size" />
280 <output name="clus_anal_pdf" file="res/clus_anal.pdf" compare="sim_size" />
281 <output name="go_en_out" file="res/go_en.tsv" compare="diff" />
282 <output name="gene_net_pdf" file="res/gene_net.pdf" compare="sim_size" />
283 <output name="imbe_out" file="res/impact_betweenness.tsv" compare="diff" />
284 </test>
285 </tests>
286
287 <!-- =============================================================== -->
288 <help>
289 IonFlow Pipeline
290 =================
291
292 Description
293 -----------
294
295 This galaxy tool wraps R package IonFlow_ with modification to process
296 ionomics data to aid reproducible data sharing and big data initiatives.
297
298 The pipeline includes:
299
300 Pre-Processing
301 This procedure performs batch correction with or without control lines,
302 outlier detection and data standardisation. The processed concentration
303 data and a symbolisation profile data are returned for further analysis.
304
305 Exploratory Analysis
306 This procedure performs correlation analysis and PCA analysis in terms of
307 ions. The heatmap with hierarchical clustering and network graph are based
308 on the correlation analysis.
309
310 Clustering Analysis
311 This step performs hierarchical clustering based on symbolised profile.
312 The selected cluster centres (control by the threshold of minimal cluster
313 centre number) are applied to enrichment and network analysis.
314
315 Enrichment Analysis
316 This step uses the clustering results for enrichment analysis. Current suports
317 three annotation packages:
318
319 - Yeast(org.Sc.sgd.db)
320 - Human(org.Hs.eg.db)
321 - Mouse(org.Mm.eg.db)
322
323 Network Analysis
324 This part uses hierarchical clustering of the symbolised profile
325 for network analysis. Genes with correlation coefficient large than a
326 threshold are then used. Some network analysis stats such as impact and
327 betweenness are also returned.
328
329 .. _IonFlow: https://github.com/AlinaPeluso/MetaboFlow
330
331 Inputs
332 ------
333
334 Ionomics data
335 ~~~~~~~~~~~~~
336
337 The input file is an ionomics data table in tubular format. The following is
338 an example with the first two columns of knockout name and batch ID and
339 other columns are ion data. To use this input data in ``Pre-processing`` , the
340 user must indicate ``var_id`` as ``1`` (``Knockout``), ``batch_id`` as ``2``
341 (``Batch_ID``) and ``data_id`` as ``3``. If the file has the batch control
342 information in the first column, ``control_lines`` should indicate. For
343 example, if ``YDL227C`` will be used as batch control, ``control_lines =
344 "YDL227C"``.
345
346
347 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
348 | Knockout | Batch_ID | Ca | Cd | Cu | Fe | K | Mg | Mo | Na | Ni | P | S |
349 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
350 | YDL227C | 14 | 59.549 | 0.953 | 2.202 | 10.942 | 3448.070 | 693.992 | 1.603 | 259.816 | 1.573 | 4963.315 | 556.397 |
351 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
352 | YDL227C | 14 | 62.258 | 0.927 | 2.067 | 26.262 | 3493.741 | 705.008 | 2.691 | 273.640 | 4.443 | 4874.101 | 553.229 |
353 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
354 | YDL227C | 14 | 65.075 | 0.875 | 2.048 | 10.244 | 3317.611 | 691.411 | 1.878 | 278.167 | 1.448 | 4608.300 | 535.609 |
355 +----------+--+-------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
356 | YDL227C | 14 | 56.886 | 0.985 | 2.203 | 9.206 | 3330.854 | 702.734 | 1.396 | 268.609 | 1.640 | 5119.736 | 546.230 |
357 +----------+----------+---------+-------+-------+--------+----------+---------+-------+---------+-------+----------+----------+
358
359 |
360
361 Custom STD data
362 ~~~~~~~~~~~~~~~
363
364 Standard derivation values can be provide by user if standardisation method
365 ``stand_method`` in pre-processing procedure is selected as ``custom``. The
366 user defined std file has tabular format such as:
367
368 +------+----------+
369 | Ion | sd |
370 +------+----------+
371 | Ca | 0.150 |
372 +------+----------+
373 | Fe | 0.163 |
374 +------+----------+
375 | K | 0.094 |
376 +------+----------+
377 | Mg | 0.059 |
378 +------+----------+
379 | Na | 0.107 |
380 +------+----------+
381 | Ni | 0.078 |
382 +------+----------+
383 | Zn | 0.067 |
384 +------+----------+
385
386 |
387
388 Outputs
389 -------
390
391 Pre-processing
392 ~~~~~~~~~~~~~~
393
394 The output includes:
395
396 - A PDF file for the plots: dot-plot with ion vs batch and distribution
397 plot of ions.
398 - A tabular file for statistical summary of data set
399 - A tabular file for outlier table
400 - A tabular file for processed data set
401 - A tabular file for the symbolisation data set
402
403 Exploratory analysis
404 ~~~~~~~~~~~~~~~~~~~~
405
406 A single PDF file with plots:
407
408 - Correlation map
409 - Heatmap with dendrogram
410 - PCA plot
411 - Correlation network graph
412
413 Clustering analysis
414 ~~~~~~~~~~~~~~~~~~~
415
416 A single PDF file with clustering plots.
417
418 Enrichment analysis
419 ~~~~~~~~~~~~~~~~~~~
420
421 A tabular file for GO Terms enrichment table.
422
423 Network analysis
424 ~~~~~~~~~~~~~~~~
425
426 Two files are returned:
427
428 - A PDF file for plots
429 - network graph
430 - impact scatter plot
431 - A tabular file for impact and betweenness table
432
433
434 </help>
435 <citations>
436 </citations>
437 </tool>