3
|
1 <tool id="abims_xcms_group" name="xcms.group" version="2.0.4">
|
0
|
2
|
|
3 <description>Group peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.</description>
|
|
4
|
|
5 <requirements>
|
|
6 <requirement type="package" version="3.1.2">R</requirement>
|
|
7 <requirement type="binary">Rscript</requirement>
|
|
8 <requirement type="package" version="1.44.0">xcms</requirement>
|
3
|
9 <requirement type="package" version="2.2.0">xcms_w4m_script</requirement>
|
0
|
10 </requirements>
|
|
11
|
|
12 <stdio>
|
|
13 <exit_code range="1:" level="fatal" />
|
|
14 </stdio>
|
|
15
|
3
|
16 <command><![CDATA[
|
0
|
17 xcms.r
|
3
|
18 xfunction group
|
|
19 image $image
|
|
20
|
|
21 xsetRdataOutput $xsetRData
|
|
22 rplotspdf $rplotsPdf
|
|
23
|
|
24 method $methods.method sleep 0.001
|
0
|
25 #if $methods.method == "density":
|
|
26 ## minsamp $methods.minsamp
|
|
27 minfrac $methods.minfrac
|
|
28 bw $methods.bw
|
|
29 mzwid $methods.mzwid
|
|
30 #if $methods.density_options.option == "show":
|
|
31 max $methods.density_options.max
|
|
32 #end if
|
|
33 #elif $methods.method == "mzClust":
|
|
34 mzppm $methods.mzppm
|
|
35 mzabs $methods.mzabs
|
|
36 minfrac $methods.minfrac
|
|
37 ## minsamp $methods.minsamp
|
|
38 #else:
|
|
39 mzVsRTbalance $methods.mzVsRTbalance
|
|
40 mzCheck $methods.mzCheck
|
|
41 rtCheck $methods.rtCheck
|
|
42 kNN $methods.kNN
|
|
43 #end if
|
3
|
44 ;
|
|
45 return=\$?;
|
|
46 mv log.txt $log;
|
|
47 cat $log;
|
|
48 sh -c "exit \$return"
|
|
49
|
|
50 ]]></command>
|
0
|
51
|
|
52 <inputs>
|
|
53 <param name="image" type="data" format="rdata.xcms.raw,rdata.xcms.group,rdata.xcms.retcor,rdata" label="xset RData file" help="output file from another function xcms (xcmsSet, retcor etc.)" />
|
|
54 <conditional name="methods">
|
|
55 <param name="method" type="select" label="Method to use for grouping" help="[method] See the help section below">
|
|
56 <option value="density" selected="true">density</option>
|
|
57 <option value="mzClust" >mzClust</option>
|
|
58 <option value="nearest" >nearest</option>
|
|
59 </param>
|
|
60 <when value="density">
|
|
61 <param name="bw" type="integer" value="30" label="Bandwidth" help="[bw] bandwidth (standard deviation or half width at half maximum) of gaussian smoothing kernel to apply to the peak density chromatogram" />
|
|
62 <param name="minfrac" type="float" value="0.5" label="Minimum fraction of samples necessary" help="[minfrac] in at least one of the sample groups for it to be a valid group" />
|
|
63 <param name="mzwid" type="float" value="0.25" label="Width of overlapping m/z slices" help="[mzwid] to use for creating peak density chromatograms and grouping peaks across samples " />
|
|
64 <!--
|
|
65 <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
|
|
66 -->
|
|
67 <conditional name="density_options">
|
|
68 <param name="option" type="select" label="Advanced options">
|
|
69 <option value="show">show</option>
|
|
70 <option value="hide" selected="true">hide</option>
|
|
71 </param>
|
|
72 <when value="show">
|
|
73 <param name="max" type="integer" value="5" label="Maximum number of groups to identify in a single m/z slice" help="[max]" />
|
|
74 </when>
|
|
75 <when value="hide">
|
|
76 </when>
|
|
77 </conditional>
|
|
78
|
|
79 </when>
|
|
80 <when value="mzClust">
|
|
81 <param name="mzppm" type="integer" value="20 " label="Relative error used for clustering/grouping in ppm" help="[mzppm]" />
|
|
82 <param name="mzabs" type="float" value="0" label="Absolute error used for clustering/grouping" help="[mzabs]" />
|
|
83 <param name="minfrac" type="float" value="0" label="Minimum fraction of each class in one bin" help="[minfrac] minimum fraction of samples necessary in at least one of the sample groups for it to be a valid group" />
|
|
84 <!--
|
|
85 <param name="minsamp" type="hidden" value="1" label="minsamp" help="minimum number of samples necessary in at least one of the sample groups for it to be a valid group " />
|
|
86 -->
|
|
87 </when>
|
|
88 <when value="nearest">
|
|
89 <param name="mzVsRTbalance" type="integer" value="10 " label="Multiplicator for mz value before calculating the (euclidean) distance between two peaks." help="[mzVsRTbalance]" />
|
|
90 <param name="mzCheck" type="float" value="0.2" label="Maximum tolerated distance for mz" help="[mzCheck]" />
|
|
91 <param name="rtCheck" type="integer" value="15" label="Maximum tolerated distance for RT" help="[rtCheck]" />
|
|
92 <param name="kNN" type="integer" value="10" label="Number of nearest Neighbours to check" help="[kNN]" />
|
|
93 </when>
|
|
94 </conditional>
|
|
95 <!--
|
|
96 <param name="sleepy" type="float" value="0.001" label="sleep" help="seconds to pause between plotting successive steps of the peak grouping algorithm. peaks are plotted as points showing relative intensity. identified groups are flanked by dotted vertical lines">
|
|
97 <validator type="in_range" message="Must be more than 0" min="0.001" max="inf"/>
|
|
98 </param>
|
|
99 -->
|
|
100
|
|
101 </inputs>
|
|
102
|
|
103 <outputs>
|
|
104 <data name="xsetRData" format="rdata.xcms.group" label="${image.name[:-6]}.group.RData"/>
|
|
105 <data name="rplotsPdf" format="pdf" label="${image.name[:-6]}.group.Rplots.pdf"/>
|
3
|
106 <data name="log" format="txt" label="xset.log.txt" hidden="true" />
|
0
|
107 </outputs>
|
|
108
|
|
109 <tests>
|
|
110 <test>
|
|
111 <param name="image" value="xset.RData"/>
|
|
112 <param name="methods.method" value="density"/>
|
|
113 <param name="methods.bw" value="5"/>
|
|
114 <param name="methods.minfrac" value="0.3"/>
|
|
115 <param name="methods.mzwid" value="0.01"/>
|
|
116 <param name="methods.density_options.option" value="show"/>
|
|
117 <param name="methods.density_options.max" value="50"/>
|
3
|
118 <!--<output name="xsetRData" file="xset.group.RData" />-->
|
|
119 <!--<output name="rplotsPdf" file="xset.group.Rplots.pdf" />-->
|
|
120 <output name="log">
|
|
121 <assert_contents>
|
|
122 <has_text text="object with 9 samples" />
|
|
123 <has_text text="Time range: 0.7-1140 seconds (0-19 minutes)" />
|
|
124 <has_text text="Mass range: 50.0019-999.9863 m/z" />
|
|
125 <has_text text="Peaks: 135846 (about 15094 per sample)" />
|
|
126 <has_text text="Peak Groups: 6642" />
|
|
127 <has_text text="Sample classes: bio, blank" />
|
|
128 </assert_contents>
|
|
129 </output>
|
0
|
130
|
|
131 </test>
|
|
132 </tests>
|
|
133
|
3
|
134 <help><![CDATA[
|
0
|
135
|
|
136 .. class:: infomark
|
|
137
|
|
138 **Authors** Colin A. Smith csmith@scripps.edu, Ralf Tautenhahn rtautenh@gmail.com, Steffen Neumann sneumann@ipb-halle.de, Paul Benton hpaul.benton08@imperial.ac.uk and Christopher Conley cjconley@ucdavis.edu
|
|
139
|
|
140 **Galaxy integration** ABiMS TEAM - UPMC/CNRS - Station biologique de Roscoff and Yann Guitton yann.guitton@univ-nantes.fr - part of Workflow4Metabolomics.org [W4M]
|
|
141
|
|
142 | Contact support@workflow4metabolomics.org for any questions or concerns about the Galaxy implementation of this tool.
|
|
143
|
|
144
|
|
145
|
|
146 ---------------------------------------------------
|
|
147
|
|
148 ==========
|
|
149 Xcms.Group
|
|
150 ==========
|
|
151
|
|
152 -----------
|
|
153 Description
|
|
154 -----------
|
|
155
|
|
156 After peak identification with xcmsSet, this tool groups the peaks which represent the same analyte across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time. Allows rejection of features, which are only partially detected within the replicates of a sample class.
|
|
157
|
|
158
|
|
159
|
|
160 -----------------
|
|
161 Workflow position
|
|
162 -----------------
|
|
163
|
|
164 **Upstream tools**
|
|
165
|
|
166 ========================= ================= =================== ==========
|
|
167 Name output file format parameter
|
|
168 ========================= ================= =================== ==========
|
|
169 xcms.xcmsSet xset.RData rdata.xcms.raw RData file
|
|
170 ------------------------- ----------------- ------------------- ----------
|
|
171 xcms.retcor xset.RData rdata.xcms.retcor RData file
|
|
172 ========================= ================= =================== ==========
|
|
173
|
|
174
|
|
175 **Downstream tools**
|
|
176
|
|
177 +---------------------------+--------------------------------------+
|
|
178 | Name | Output file | Format |
|
|
179 +===========================+=================+====================+
|
|
180 |xcms.retcor | xset.RData | rdata.xcms.group |
|
|
181 +---------------------------+--------------------------------------+
|
|
182 |xcms.fillPeaks | xset.RData | rdata.xcms.group |
|
|
183 +---------------------------+--------------------------------------+
|
|
184
|
|
185 The output file is an xcmsSet.RData file. You can continue your analysis using it in **xcms.retcor** tool as an next step and then **xcms.fillPeaks**.
|
|
186
|
|
187 **General schema of the metabolomic workflow**
|
|
188
|
|
189 .. image:: xcms_group_workflow.png
|
|
190
|
|
191
|
|
192 -----------
|
|
193 Input files
|
|
194 -----------
|
|
195
|
|
196 +---------------------------+-----------------------+
|
|
197 | Parameter : num + label | Format |
|
|
198 +===========================+=======================+
|
|
199 | 1 : RData file | rdata.xcms.group |
|
|
200 +---------------------------+-----------------------+
|
|
201
|
|
202
|
|
203 ----------
|
|
204 Parameters
|
|
205 ----------
|
|
206
|
|
207 Method to use for grouping
|
|
208 --------------------------
|
|
209
|
|
210 **mzClust**
|
|
211
|
|
212 | Runs high resolution alignment on single spectra samples stored in the RData file generated by the **xcmsSet tool**.
|
|
213
|
|
214 **density**
|
|
215
|
|
216 | Groups peaks together across samples using overlapping m/z bins and calculation of smoothed peak distributions in chromatographic time.
|
|
217
|
|
218 **nearest**
|
|
219
|
|
220 | Groups peaks together across samples by creating a master peak list and assigning corresponding peaks from all samples. It is inspired by the alignment algorithm of mzMine.
|
|
221
|
|
222
|
|
223 ------------
|
|
224 Output files
|
|
225 ------------
|
|
226
|
|
227 xset.group.Rplots.pdf
|
|
228
|
|
229 xset.group.RData: rdata.xcms.group format
|
|
230
|
|
231 | Rdata file that will be necessary in the third and fourth step of the workflow (xcms.retcor and xcms.fillpeaks).
|
|
232
|
|
233
|
|
234 ------
|
|
235
|
|
236 .. class:: infomark
|
|
237
|
|
238 The output file is an xset.group.RData file. You can continue your analysis using it in **xcms.retcor** tool.
|
|
239
|
|
240
|
|
241 ---------------------------------------------------
|
|
242
|
|
243
|
|
244 ---------------
|
|
245 Working example
|
|
246 ---------------
|
|
247
|
|
248 Input files
|
|
249 -----------
|
|
250
|
|
251 | RData file -> **xset.RData**
|
|
252
|
|
253 Parameters
|
|
254 ----------
|
|
255
|
|
256 | Method -> **density**
|
|
257 | bw -> **5**
|
|
258 | minfrac -> **0.3**
|
|
259 | mzwid -> **0.01**
|
|
260 | Advanced options: **show**
|
|
261 | max -> **50**
|
|
262
|
|
263
|
|
264 Output files
|
|
265 ------------
|
|
266
|
|
267 | **1) xset.RData: RData file**
|
|
268
|
|
269 | **2) Example of an xset.group.Rplots pdf file**
|
|
270
|
|
271 .. image:: xcms_group.png
|
|
272 :width: 700
|
|
273
|
|
274
|
3
|
275 ---------------------------------------------------
|
2
|
276
|
3
|
277 Changelog/News
|
|
278 --------------
|
|
279
|
|
280 **Version 2.0.4 - 10/02/2016**
|
|
281
|
|
282 - BUGFIX: better management of errors. Datasets remained green although the process failed
|
|
283
|
|
284 - UPDATE: refactoring of internal management of inputs/outputs
|
|
285
|
|
286 - UPDATE: refactoring to feed the new report tool
|
|
287
|
|
288
|
|
289 **Version 2.0.2 - 02/06/2015**
|
|
290
|
|
291 - IMPROVEMENT: new datatype/dataset formats (rdata.xcms.raw, rdata.xcms.group, rdata.xcms.retcor ...) will facilitate the sequence of tools and so avoid incompatibility errors.
|
|
292
|
|
293 - IMPROVEMENT: parameter labels have changed to facilitate their reading.
|
|
294
|
|
295
|
|
296 ]]></help>
|
0
|
297
|
|
298
|
|
299 <citations>
|
|
300 <citation type="doi">10.1021/ac051437y</citation>
|
|
301 <citation type="doi">10.1093/bioinformatics/btu813</citation>
|
|
302 </citations>
|
|
303
|
|
304
|
|
305 </tool>
|