comparison qualitymetrics_config.xml @ 0:b4f5b5bc01dd draft

planemo upload for repository https://github.com/workflow4metabolomics/qualitymetrics.git commit 73366dd3473c509341ab9ba1df8ba748d08a50a1
author ethevenot
date Sat, 06 Aug 2016 12:01:17 -0400
parents
children 6d3b7b6573d8
comparison
equal deleted inserted replaced
-1:000000000000 0:b4f5b5bc01dd
1 <tool id="quality_metrics" name="Quality Metrics" version="2.2.4">
2 <description>Metrics and graphics to check the quality of the data</description>
3
4 <requirements>
5 <requirement type="package" version="3.2.2">R</requirement>
6 <requirement type="package">r-batch</requirement>
7 <requirement type="package" version="1.4.2">bioconductor-ropls</requirement>
8 </requirements>
9
10 <stdio>
11 <exit_code range="1:" level="fatal" />
12 </stdio>
13
14 <command><![CDATA[
15 Rscript $__tool_directory__/qualitymetrics_wrapper.R
16 dataMatrix_in "$dataMatrix_in"
17 sampleMetadata_in "$sampleMetadata_in"
18 variableMetadata_in "$variableMetadata_in"
19
20 CV "${CV_condition.CV}"
21 #if str($CV_condition.CV ) == 'TRUE':
22 Compa "${CV_condition.Compa}"
23 seuil "${CV_condition.seuil}"
24 #else:
25 Compa "TRUE"
26 seuil "1"
27 #end if
28
29 #if $advPar.optC == "full"
30 poolAsPool1L "$advPar.poolAsPool1L"
31 #else:
32 poolAsPool1L "TRUE"
33 #end if
34
35 sampleMetadata_out "$sampleMetadata_out"
36 variableMetadata_out "$variableMetadata_out"
37 figure "$figure"
38 information "$information"
39 ]]></command>
40
41 <inputs>
42 <param name="dataMatrix_in" type="data" label="Data matrix file" help="" format="tabular" />
43 <param name="sampleMetadata_in" type="data" label="Sample metadata file" help="" format="tabular" />
44 <param name="variableMetadata_in" type="data" label="Variable metadata file" help="" format="tabular" />
45
46 <conditional name="CV_condition">
47 <param name="CV" type="select" label="Coefficient of Variation" help="">
48 <option value="FALSE">no</option>
49 <option value="TRUE">yes</option>
50 </param>
51 <when value="TRUE">
52 <param name="Compa" label="Which type of CV calculation should be done" type="select" display="radio" help="">
53 <option value="TRUE">ratio between pool and sample CVs</option>
54 <option value="FALSE">only pool CV</option>
55 </param>
56 <param name="seuil" type="float" label="Threshold" value="1.25" min="0.0000000000000001" help="if comparing pool and sample CVs, corresponds to the max ratio tolerated (basically between 1.0 and 1.25) ; else corresponds to the max pool CV tolerated (basically 0.3)"/>
57 </when>
58 <when value="FALSE">
59 <param name="Compa" type="hidden" value="TRUE"/>
60 <param name="seuil" type="hidden" value="1"/>
61 </when>
62 </conditional>
63
64 <conditional name="advPar">
65 <param name="optC" type="select" label="Advanced parameters" >
66 <option value="default" selected="true">Use default</option>
67 <option value="full">Full parameter list</option>
68 </param>
69 <when value="full">
70 <param name="poolAsPool1L" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" label="Use 'pool' samples as 'pool1' when computing the correlation with dilution?"/>
71 </when>
72 <when value="default">
73 <param name="poolAsPool1L" type="hidden" value="TRUE"/>
74 </when>
75 </conditional>
76
77 </inputs>
78
79 <outputs>
80 <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
81 <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
82 <data name="figure" label="${tool.name}_figure.pdf" format="pdf"/>
83 <data name="information" label="${tool.name}_information.txt" format="txt"/>
84 </outputs>
85
86 <tests>
87 <test>
88 <param name="dataMatrix_in" value="input-dataMatrix.tsv"/>
89 <param name="sampleMetadata_in" value="input-sampleMetadata.tsv"/>
90 <param name="variableMetadata_in" value="input-variableMetadata.tsv"/>
91 <param name="CV" value="FALSE"/>
92 <param name="optC" value="default"/>
93 <output name="sampleMetadata_out" file="output-sampleMetadata.tsv"/>
94 <output name="variableMetadata_out" file="output-variableMetadata.tsv"/>
95 </test>
96 </tests>
97
98 <help>
99
100 .. class:: infomark
101
102 **Authors** Marion Landi, Melanie Petera and Etienne Thevenot (W4M Core Development Team)
103
104 ---------------------------------------------------
105
106 .. class:: infomark
107
108 **Tool updates**
109
110 See the **NEWS** section at the bottom of this page
111
112 ---------------------------------------------------
113
114 .. class:: infomark
115
116 **References**
117
118 | Thevenot EA., Roux A., Xu Y., Ezan E., and Junot C. (2015). Analysis of the human adult urinary metabolome variations with age, body mass index and gender by implementing a comprehensive workflow for univariate and OPLS statistical analyses. *Journal of Proteome Research*, **14**:3322-3335 (http://dx.doi.org/10.1021/acs.jproteome.5b00354)
119 | Mason R., Tracy N. and Young J. (1997). A practical approach for interpreting multivariate T2 control chart signals. *Journal of Quality Technology*, **29**:396-406.
120 | Alonso A., Julia A., Beltran A., Vinaixa M., Diaz M., Ibanez L., Correig X. and Marsal S. (2011). AStream: an R package for annotating LC/MS metabolomic data. *Bioinformatics*, **27**:1339-1340. (http://dx.doi.org/10.1093/bioinformatics/btr138)
121
122 ---------------------------------------------------
123
124 ========================
125 Quality Metrics
126 ========================
127
128 -----------
129 Description
130 -----------
131
132 | The **Quality Metrics** tool provides quality metrics of the samples and variables, and visualization of the data matrix
133 | The optional *Coefficient of Variation* arguments allows to flag the variables with a pool CV (or a pool CV over sample CV ratio) above a specific threshold
134 | The advanced *PoolAsPool1* argument is used when correlations with pool dilutions are computed: When set to TRUE [default], samples indicated as "pool" will be considered as "pool1" for the correlation together with the other pool dilutions (e.g. "pool2", "pool4", etc.); otherwise, "pool" samples will not be considered to compute the correlation (this enables the experimenter to have distinct "pool" samples for the computation of CV and "pool1" samples for the computation of dilution)
135 | The **sampleMetadata** is returned as output with 3 additional columns containing the p-values for the Hotellings'T2 and Z-scores of intensity deciles and proportion of missing values
136 | The **variableMetadata** is returned as output; in case a **sampleType** column is included in the input sampleMetadata file, additional columns will be added to indicate the variable quality metrics (eg mean, sd, CV on 'pool', 'sample' or 'blank', or correlation with pool dilutions, depending on the known type present in the 'sampleType' column)
137 | A **figure** is generated (pdf file) which illustrates the main computed sample and variable metric values
138
139
140
141 -----------------
142 Workflow position
143 -----------------
144
145 .. image:: QualityControl.png
146 :width: 800
147
148
149
150 -----------
151 Input files
152 -----------
153
154 +----------------------------+---------+
155 | Parameter : num + label | Format |
156 +============================+=========+
157 | 1 : Data matrix file | tabular |
158 +----------------------------+---------+
159 | 2 : Sample metadata file | tabular |
160 +----------------------------+---------+
161 | 3 : Variable metadata file | tabular |
162 +----------------------------+---------+
163
164 ----------
165 Parameters
166 ----------
167
168 Data matrix
169 | contains the intensity values of the variables.
170 |
171
172 Sample metadata file
173 | contains the metadata of the samples; in particular
174 | when the 'sampleType' column is available, with known types such as 'blank', 'sample', 'pool', 'poolN' (where N is a dilution factor of the pool), metrics will be computed (eg mean, sd, CV, correlation with the dilution factor, etc) for each variable (see the 'PoolAsPool1' argument below)
175 | 'pool' (and 'sample') should be present in the 'sampleType' column when setting the 'coefficient of variation' to TRUE
176 |
177
178 Variable metadata file
179 | contains variable information.
180 |
181
182 Note:
183 | Required formats for the dataMatrix, sampleMetadata, and variableMetadata files are described in the **HowTo** entitled 'Format Data For Postprocessing' available on the main page of Workflow4Metabolomics.org; the formats of the 3 tables can be further checked with the **Check Format** module
184 |
185
186 Coefficient of Variation
187 | If 'yes' (not default): variables are classed according to the Coefficient of Variation (CV)
188 | i.e.: CV of pools (and CV of samples if needed) are calculated and compared to a defined threshold;
189 | then variables are classed with a 0/1 coding.
190 |
191
192 Which type of CV calculation should be done (only if CV=yes)
193 | Type of CV comparison that will be used.
194 | 'ratio between pool and sample CVs' **OR** 'only pool CV'
195 |
196
197 Threshold (only if CV=yes)
198 | If comparing pool and sample CVs, corresponds to the max ratio tolerated (basically between 1.0 and 1.25).
199 | Else corresponds to the max pool CV tolerated (basically 0.3).
200 |
201
202 PoolAsPool1 (Advanced parameter)
203 | If 'poolN' (where N is a dilution factor) sample types are present in the 'sampleType' column of the sample metadata file, the Pearson correlation of the intensity with the dilution factor is computed for each variable; the 'PoolAsPool1' parameter indicates whether samples of 'pool' types should be considered as 'pool1' (and hence included in the computation of dilution correlations); default is TRUE
204
205
206 ------------
207 Output files
208 ------------
209
210
211 sampleMetadata.tabular
212 | tsv output
213 | 3 additional columns have been added to the input sampleMetadata file and contain the **p-values** of
214 | 1) the **Hotelling's T2** test in the first plane of PC components (Mason et al, 1997)
215 | 2) the **Z-score** of **intensity deciles** (Alonso et al, 2011)
216 | 3) the **Z-score** of the proportion of **missing values** (Alonso et al, 2011)
217 | for each test, low p-values indicate samples with extreme behaviour
218 |
219
220 variableMetadata.tabular
221 | tsv output
222 | When the type of samples is available (ie the **sampleType** column is included in the input sampleMetadata file), variable metrics are computed: **sample**, **pool**, and **blank** **mean**, **sd** and **CV** (if the corresponding types are present in the 'sampleType' column), as well as **'blank' mean / 'sample' mean**, and **'pool' CV / 'sample' CV ratio**
223 | If pool dilutions have been used and are indicated in the 'sampleType' column as **poolN** where N is an integer indicating the dilution factor (eg **pool2** for a two-fold dilution of the pool; note that the non-diluted pool remains indicated as 'pool') the Pearson **correlation** (and corresponding p-value) between the intensity and the dilution factor is computed for each variable.
224 | When the **Coefficient of variation** argument is set to 'TRUE', the variableMetadata begins with 2 (or 3) columns indicating the pool CV (and the sample CV) and if the pool CV (or the ratio between pool CV and sample CV) is above the selected threshold
225 |
226
227 figure.pdf
228 | Figure summarizing the various values of the computed metrics and tests; includes several visualizations of the samples (eg, PCA scores) and intensities (eg, image of the data matrix)
229 |
230
231 information.txt
232 | Text file with informations regarding the metrics computed, eg those depending on the availability of the 'sampleMetadata' column, and specific types such as 'sample', 'pool', pool dilutions ('poolN'), or 'blank'
233 |
234
235
236 ---------------------------------------------------
237
238 ---------------
239 Working example
240 ---------------
241
242 |
243
244 .. class:: infomark
245
246 See the **W4M00001b_sacurine-complete** shared history in the **Shared Data/Published Histories** menu (https://galaxy.workflow4metabolomics.org/history/list_published)
247
248 ---------------------------------------------------
249
250 ----
251 NEWS
252 ----
253
254 CHANGES IN VERSION 2.2.4
255 ========================
256
257 Additional running and installation tests added with planemo, conda, and travis
258
259 CHANGES IN VERSION 2.2.3
260 ========================
261
262 INTERNAL MODIFICATIONS
263
264 Modifications of the **qualitymetrics_script.R** file to handle the recent **ropls** package versions (i.e. 1.3.15 and above) which use S4 classes
265
266 Creating tests for the R code
267
268 CHANGES IN VERSION 2.2.2
269 ========================
270
271 Minor internal changes
272
273
274 </help>
275
276 <citations>
277 <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
278 <citation type="doi">10.1093/bioinformatics/btr138</citation>
279 <citation type="bibtex">@Article{Mason1997,
280 Title = {A practical approach for interpreting multivariate T2 control chart signals},
281 Author = {Mason, RL. and Tracy, ND. and Young, JC.},
282 Journal = {Journal of Quality Technology},
283 Year = {1997},
284 Number = {4},
285 Pages = {396-406},
286 Volume = {29},
287 }</citation>
288 <citation type="doi">10.1093/bioinformatics/btu813</citation>
289 </citations>
290
291 <!--
292
293 Input files
294 ===========
295
296 | **To generate the "dataMatrix", "sampleMetadata" and "variableMetadata" files:**
297 | **1) copy/paste the values below in three distinct .txt files**
298 | **2) use the "Get Data" / "Upload File" in the "Tools" (left) panel from the Galaxy page by choosing:**
299 | **Convert spaces to tabs: 'Yes'**
300 |
301
302 **dataMatrix file**::
303
304 dataMatrix QC_4 sam_44 sam_18 sam_23 blk_3 sam_9 sam_22 QC_6 blk_4
305 met_031 5601185.9 4446133.4 4144765.4 3085899.9 NA 6748534.9 5819543.8 3256720.3 NA
306 met_032 4.07 4.08 4.11 4.1 NA 4.04 4.13 4.11 NA
307 met_033 1448205184 1456986135 993364802.3 1162711600 5569143.2 1043559922 1465003454 1052094028 5247494.3
308 met_034 4.11 4.21 4.18 4.1 4.09 4.1 4.14 4.11 4.08
309 met_035 3777580.7 2296751 1890711.7 1767424.6 6567.5 1906253.5 3043253.9 2856958.5 7940.8
310 met_036 4.12 4.21 4.26 4.1 4.11 4.22 4.27 4.12 4.2
311 met_037 4982658.7 3751181.8 4219033.2 2425759.9 NA 11978184.4 4306459.5 3352187 NA
312 met_038 4.45 4.38 4.4 4.4 NA 4.44 4.46 4.32 NA
313 met_039 6658087.7 3231434.7 2932986.5 4098788.3 NA 3691132.6 6108614.4 4541941.9 NA
314 met_040 4.49 4.56 4.48 4.5 NA 4.45 4.54 4.46 NA
315
316 **sampleMetadata file**::
317
318 sampleMetadata injectionOrder batch sampleType
319 QC_4 19 batch1 pool
320 sam_44 20 batch1 sample
321 sam_18 23 batch1 sample
322 sam_23 27 batch1 sample
323 blk_3 31 batch1 blank
324 sam_9 34 batch1 sample
325 sam_22 38 batch1 sample
326 QC_6 42 batch1 pool
327 blk_4 43 batch1 blank
328
329 **variableMetadata file**::
330
331 variableMetadata number
332 met_031 31
333 met_032 32
334 met_033 33
335 met_034 34
336 met_035 35
337 met_036 36
338 met_037 37
339 met_038 38
340 met_039 39
341 met_040 40
342
343 Figure output
344 =============
345
346 | You should obtain with this very simplified dataset the following figure:
347 |
348
349 .. image:: qualitymetrics_workingExampleImage.png
350 :width: 600
351
352 -->
353
354 </tool>