comparison w4mclassfilter.xml @ 4:499c7ecfa834 draft

planemo upload for repository https://github.com/HegemanLab/w4mclassfilter_galaxy_wrapper/tree/master commit 7049f74a86f6e47565a68336d6496d112713cbba
author eschen42
date Mon, 19 Jun 2017 23:42:33 -0400
parents 191a720488ce
children 2cdf7d5982c8
comparison
equal deleted inserted replaced
3:191a720488ce 4:499c7ecfa834
1 <tool id="w4mclassfilter" name="Sample_Subset" version="0.98.1"> 1 <tool id="w4mclassfilter" name="Sample_Subset" version="0.98.2">
2 <description>Filter W4M data by sample class</description> 2 <description>Filter W4M data by sample class</description>
3 3
4 <requirements> 4 <requirements>
5 <requirement type="package" version="3.3.1">r-base</requirement> 5 <requirement type="package" version="3.3.1">r-base</requirement>
6 <requirement type="package" version="1.1_4">r-batch</requirement> 6 <requirement type="package" version="1.1_4">r-batch</requirement>
7 <requirement type="package" version="0.98.1">w4mclassfilter</requirement> 7 <requirement type="package" version="0.98.2">w4mclassfilter</requirement>
8 </requirements> 8 </requirements>
9 9
10 <stdio> 10 <stdio>
11 <exit_code range="1:" level="fatal" /> 11 <exit_code range="1:" level="fatal" />
12 </stdio> 12 </stdio>
13 13
14 14
15 <command detect_errors="aggressive"><![CDATA[ 15 <command detect_errors="aggressive"><![CDATA[
16 Rscript $__tool_directory__/w4mclassfilter_wrapper.R 16 Rscript $__tool_directory__/w4mclassfilter_wrapper.R
17 dataMatrix_in "$dataMatrix_in" 17 dataMatrix_in '$dataMatrix_in'
18 sampleMetadata_in "$sampleMetadata_in" 18 sampleMetadata_in '$sampleMetadata_in'
19 variableMetadata_in "$variableMetadata_in" 19 variableMetadata_in '$variableMetadata_in'
20 sampleclassNames "$sampleclassNames" 20 sampleclassNames '$sampleclassNames'
21 inclusive "$inclusive" 21 inclusive '$inclusive'
22 classnameColumn "$classnameColumn" 22 wildcards '$wildcards'
23 samplenameColumn "$samplenameColumn" 23 classnameColumn '$classnameColumn'
24 dataMatrix_out "$dataMatrix_out" 24 samplenameColumn '$samplenameColumn'
25 sampleMetadata_out "$sampleMetadata_out" 25 dataMatrix_out '$dataMatrix_out'
26 variableMetadata_out "$variableMetadata_out" 26 sampleMetadata_out '$sampleMetadata_out'
27 variableMetadata_out '$variableMetadata_out'
27 ]]></command> 28 ]]></command>
28 29
29 <inputs> 30 <inputs>
30 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" /> 31 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
31 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" /> 32 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
32 <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" /> 33 <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />
33 <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names of sample classes to filter in or out; defaults to no names" /> 34 <param name="samplenameColumn" label="Column that names the sample" type="text" value = "sampleMetadata" help="name of the column in the sample metadata file that has the name of the sample - defaults to 'sampleMetadata'" />
35 <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" help="name of the column in sample metadata that has the values to be tested against the 'classes' input parameter - defaults to 'class'" />
36 <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names (or comma-less regular expressions to match names) of sample-classes to filter in or out; defaults to no names">
37 <sanitizer sanitize="False"/>
38 </param>
39 <param name="wildcards" label="Use wild-cards or regular-expressions" type="select" help="wild-cards (the default) - use '*' and '?' to match class names; regular-expressions - use comma-less regular expressions to match class names">
40 <option value="TRUE" selected="true">wild-cards</option>
41 <option value="FALSE">regular-expressions</option>
42 </param>
34 <param name="inclusive" label="Include named classes" type="select" help="filter-in - include only the named sample classes; filter-out (the default) - exclude only the named sample classes"> 43 <param name="inclusive" label="Include named classes" type="select" help="filter-in - include only the named sample classes; filter-out (the default) - exclude only the named sample classes">
35 <option value="TRUE">filter-in</option> 44 <option value="TRUE">filter-in</option>
36 <option value="FALSE" selected="true">filter-out</option> 45 <option value="FALSE" selected="true">filter-out</option>
37 </param> 46 </param>
38 <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" help="name of the column in sample metadata that has the values to be tested against the 'classes' input parameter - defaults to 'class'" />
39 <param name="samplenameColumn" label="Column that names the sample" type="text" value = "sampleMetadata" help="name of the column in sample metadata that has the name of the sample - defaults to 'sampleMetadata'" />
40 </inputs> 47 </inputs>
41 <outputs> 48 <outputs>
42 <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data> 49 <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data>
43 <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data> 50 <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
44 <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data> 51 <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
154 <has_text text="HMDB00512" /> 161 <has_text text="HMDB00512" />
155 <has_text text="HMDB00251" /> 162 <has_text text="HMDB00251" />
156 </assert_contents> 163 </assert_contents>
157 </output> 164 </output>
158 </test> 165 </test>
166 <test>
167 <param name="dataMatrix_in" value="input_nofilter_dataMatrix.tsv"/>
168 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
169 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
170 <param name="classnameColumn" value="gender"/>
171 <param name="sampleclassNames" value="M"/>
172 <param name="samplenameColumn" value="sampleMetadata"/>
173 <param name="inclusive" value="filter-in"/>
174 <output name="variableMetadata_out">
175 <assert_contents>
176 <has_text text="HMDB03193" />
177 <not_has_text text="HMDB00822" />
178 <has_text text="HMDB01101" />
179 <has_text text="HMDB01101.1" />
180 <has_text text="HMDB10348" />
181 <has_text text="HMDB59717" />
182 <not_has_text text="HMDB13189" />
183 <has_text text="HMDB00299" />
184 <has_text text="HMDB00191" />
185 <has_text text="HMDB00518" />
186 <has_text text="HMDB00715" />
187 <has_text text="HMDB01032" />
188 <has_text text="HMDB00208" />
189 <has_text text="HMDB04824" />
190 <has_text text="HMDB00512" />
191 <has_text text="HMDB00251" />
192 </assert_contents>
193 </output>
194 </test>
195 <test>
196 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
197 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
198 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
199 <param name="classnameColumn" value="gender"/>
200 <param name="sampleclassNames" value="[Mm],[fF]"/>
201 <param name="samplenameColumn" value="sampleMetadata"/>
202 <param name="inclusive" value="filter-in"/>
203 <output name="sampleMetadata_out">
204 <assert_contents>
205 <has_text text="HU_028" />
206 <has_text text="HU_051" />
207 <has_text text="HU_060" />
208 <has_text text="HU_110" />
209 <has_text text="HU_149" />
210 <has_text text="HU_152" />
211 <has_text text="HU_175" />
212 <has_text text="HU_178" />
213 <has_text text="HU_185" />
214 <not_has_text text="HU_204" />
215 <has_text text="HU_208" />
216 <has_text text="HU_017" />
217 <has_text text="HU_034" />
218 <has_text text="HU_078" />
219 <has_text text="HU_091" />
220 <has_text text="HU_093" />
221 <has_text text="HU_099" />
222 <has_text text="HU_130" />
223 <has_text text="HU_134" />
224 <has_text text="HU_138" />
225 </assert_contents>
226 </output>
227 </test>
228 <test>
229 <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
230 <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
231 <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
232 <param name="classnameColumn" value=""/>
233 <param name="sampleclassNames" value="M"/>
234 <param name="samplenameColumn" value="sampleMetadata"/>
235 <param name="inclusive" value="filter-in"/>
236 <output name="sampleMetadata_out">
237 <assert_contents>
238 <has_text text="HU_028" />
239 <has_text text="HU_051" />
240 <has_text text="HU_060" />
241 <has_text text="HU_110" />
242 <has_text text="HU_149" />
243 <has_text text="HU_152" />
244 <has_text text="HU_175" />
245 <has_text text="HU_178" />
246 <has_text text="HU_185" />
247 <not_has_text text="HU_204" />
248 <has_text text="HU_208" />
249 <has_text text="HU_017" />
250 <has_text text="HU_034" />
251 <has_text text="HU_078" />
252 <has_text text="HU_091" />
253 <has_text text="HU_093" />
254 <has_text text="HU_099" />
255 <has_text text="HU_130" />
256 <has_text text="HU_134" />
257 <has_text text="HU_138" />
258 </assert_contents>
259 </output>
260 </test>
159 </tests> 261 </tests>
160 262
161 263
162 264
163 <help> 265 <help>
164 <![CDATA[ 266 <![CDATA[
165 267
166 .. class:: infomark
167 268
168 **Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu) 269 **Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)
169 270
170 -------------------------------------------------------------------------- 271 --------------------------------------------------------------------------
171 272
172 .. class:: infomark
173 273
174 **R package** 274 **R package**
175 275
176 The *w4mclassfilter* package is available from the Hegeman lab github repository (https://github.com/HegemanLab/w4mclassfilter/releases). 276 The *w4mclassfilter* package is available from the Hegeman lab github repository (https://github.com/HegemanLab/w4mclassfilter/releases).
177 277
178 ----------------------------------------------------------------------------------------------------------------------------------------- 278 -----------------------------------------------------------------------------------------------------------------------------------------
179 279
180 .. class:: infomark
181 280
182 **Tool updates** 281 **Tool updates**
183 282
184 See the **NEWS** section at the bottom of this page 283 See the **NEWS** section at the bottom of this page
185 284
197 296
198 ----------------- 297 -----------------
199 Workflow Position 298 Workflow Position
200 ----------------- 299 -----------------
201 300
202 - Upstream tool category: Preprocessing 301 - Upstream tool category: Preprocessing
203 - Downstream tool categories: Normalisation, Statistical Analysis, Quality Control 302 - Downstream tool categories: Normalisation, Statistical Analysis, Quality Control, Filter and Sort
204 303
205 ---------- 304 ----------
206 Motivation 305 Motivation
207 ---------- 306 ----------
208 307
209 GC-MS1 and LC-MS1 experiments seek to resolve chemicals as features that have distinct chromatographic behavior and (after ionization) mass-to-charge ratio. 308 GC-MS1 and LC-MS1 experiments seek to resolve chemicals as features that have distinct chromatographic behavior and (after ionization) mass-to-charge ratio.
210 Data for a sample are collected as MS intensities, each of which is associated with a position on a 2D plane with dimensions of m/z ratio and chromatographic retention time. 309 Data for a sample are collected as MS intensities, each of which is associated with a position on a 2D plane with dimensions of m/z ratio and chromatographic retention time.
211 Ideally, features would be sufficiently reproducible from sample-run to sample-run to identify features that are commmon among samples and those that differ. 310 Ideally, features would be sufficiently reproducible from sample-run to sample-run to identify features that are commmon among samples and those that differ.
212 However, the chromatographic retention time for a chemical can vary from one run to another. 311 However, the chromatographic retention time for a chemical can vary from one run to another.
213 In the Workflow4Metabolomics (W4M, [Giacomoni *et al.*, 2014]) "flavor" of Galaxy, the XCMS [Smith *et al.*, 2006] preprocessing tools provide for "retention time correction" to align features among samples, but features may be better aligned if pooled samples and blanks are included. 312 In the Workflow4Metabolomics (W4M, [Giacomoni *et al.*, 2014]) "flavor" of Galaxy, the XCMS [Smith *et al.*, 2006] preprocessing tools provide for "retention time correction" to align features among samples, but features may be better aligned if pooled samples and blanks are included.
214 313
215 Multivariate statistical techniques may be used to discover clusters of similar samples, and sometimes it is desirable to apply clustering iteratively to smaller and smaller subsets of samples until observable separation of clusters is no longer significant. 314 Multivariate statistical techniques may be used to discover clusters of similar samples, and sometimes it is desirable to apply clustering iteratively to smaller and smaller subsets of samples until observable separation of clusters is no longer significant.
216 Once feature-alignment has been achieved among samples in GC-MS and LC-MS datasets, however, the presence of pools and blanks may confound identification and separation of clusters. 315 Once feature-alignment has been achieved among samples in GC-MS and LC-MS datasets, however, the presence of pools and blanks may confound identification and separation of clusters.
221 The tool uses a "sample-class" column in the sample metadata as the basis for including or eliminating samples for further analysis. 320 The tool uses a "sample-class" column in the sample metadata as the basis for including or eliminating samples for further analysis.
222 Class-values to be considered are provided by the user as a comma-separated list. 321 Class-values to be considered are provided by the user as a comma-separated list.
223 The user also provides an indication whether the list specifies classes to be included in further analysis ("filter-in") or rather to be excluded from it ("filter-out"). 322 The user also provides an indication whether the list specifies classes to be included in further analysis ("filter-in") or rather to be excluded from it ("filter-out").
224 Next, missing and negative intensites for features of the remaining samples are imputed to zero. 323 Next, missing and negative intensites for features of the remaining samples are imputed to zero.
225 Finally, samples or features with zero variance are eliminated. 324 Finally, samples or features with zero variance are eliminated.
226
227 325
228 ----------- 326 -----------
229 Input files 327 Input files
230 ----------- 328 -----------
231 329
254 352
255 Variable metadata file 353 Variable metadata file
256 | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values 354 | variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
257 | 355 |
258 356
357 Column that names the sample (default = '``sampleMetadata``')
358 | name of the column in sample metadata that has the name of the sample
359 |
360
361 Column that names the sample-class (default = '``class``')
362 | name of the column in sample metadata that has the values to be tested against the '``classes``' input parameter
363 |
364
259 Names of sample classes (default = no names) 365 Names of sample classes (default = no names)
260 | comma-separated names of sample classes to include or exclude 366 | comma-separated names of sample classes to include or exclude
261 | 367 |
262 368
263 Include named classes (default = filter-out) 369 Wild-cards (default = '``wild-cards``')
264 | *filter-in* - include only the named sample classes 370 | '``wild-cards``' - use wild-cards to match names of sample classes (see 'Wild card patterns to match class-names' below)
265 | *filter-out* - exclude only the named sample classes 371 | '``regular-expressions``' - exclude only the named sample classes (see 'Regular expression patterns to match class-names' below)
266 | 372 |
267 373
268 374 Include named classes (default = '``filter-out``')
269 Column that names the sample-class (default = 'class') 375 | '``filter-in``' - include only the named sample classes
270 | name of the column in sample metadata that has the values to be tested against the 'classes' input parameter 376 | '``filter-out``' - exclude only the named sample classes
271 | 377 |
272 378
273 Column that names the sample (default = 'sampleMetadata')
274 | name of the column in sample metadata that has the name of the sample
275 |
276 379
277 380
278 ------------ 381 ------------
279 Output files 382 Output files
280 ------------ 383 ------------
289 | 392 |
290 393
291 dataMatrix 394 dataMatrix
292 | (tabular separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance) and columns that have been filtered out (by the sample-class filter or because of zero variance) 395 | (tabular separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance) and columns that have been filtered out (by the sample-class filter or because of zero variance)
293 | 396 |
397
398
399 ---------------------------------------
400 Wild card patterns to match class-names
401 ---------------------------------------
402
403 Beginning with v0.98.2, w4mclassfilter supports use of R regular expression patterns to select class-names.
404
405 - use '``?``' to match a single character
406 - use '``*``' to match zero or more characters
407 - the entire pattern must match the sample name
408
409 For example
410
411 - '``??.samp*``' matches '``my.sample``' but not '``my.own.sample``'
412 - '``*.sample``' matches '``my.sample``' and '``my.own.sample``'
413 - '``*.sampl``' matches neither '``my.sample``' nor '``my.own.sample``'
414
415 ------------------------------------------------
416 Regular expression patterns to match class-names
417 ------------------------------------------------
418
419 Beginning with v0.98.2, w4mclassfilter supports use of R regular expression patterns to select class-names.
420
421 R uses POSIX 1003.2 standard regular expressions, which allow precise pattern-matching and are exhaustively defined at:
422 http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html
423
424 However, only a few basic building blocks of regular expressions need to be mastered for most cases:
425
426 - '``^``' matches the beginning of a class-name
427 - '``$``' matches the end of a class-name
428 - '``.``' outside of square brackets matches a single character
429 - '``*``' matches character specified immediately before zero or more times
430 - square brackets specify a set of characters to be matched.
431
432 Within square brackets
433
434 - '``^``' as the first character specifies that the list of characters are those that should **not** be matched.
435 - '``-``' is used to specify ranges of characters
436
437 Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-class names, so **commas may not be used within regular expressions for this tool**
438
439 First Example: Consider a field of class-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``'
440
441 - The regular expression '``^front[0-9][0-9]*$``' will match the same sample-classes as '``front3,front6,front9,front12``'
442 - The regular expression '``^[a-z][a-z]3$``' will match the same sample-classes as '``front3,marq3``'
443 - The regular expression '``^[a-z][a-z]12$``' will match the same sample-classes as '``front12,marq12``'
444 - The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-classes as '``front3,front6,front9,marq3,marq6,marq9``'
445
446 Second Example: Consider these regular expression patterns as possible matches to a sample-class name '``AB0123``':
447
448 - '``^[A-Z][A-Z][0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
449 - '``^[A-Z][A-Z]*[0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
450 - '``^[A-Z][0-9]*``' - MATCHES '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
451 - '``^[A-Z][A-Z][0-9]``' - MATCHES '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
452 - '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
453 - '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.
294 454
295 455
296 --------------- 456 ---------------
297 Working example 457 Working example
298 --------------- 458 ---------------
346 506
347 ---- 507 ----
348 NEWS 508 NEWS
349 ---- 509 ----
350 510
511 CHANGES IN VERSION 0.98.2
512 =========================
513
514 NEW FEATURES
515
516 * Added support for R-flavored regular expression pattern-matching when selecting names of sample-classes.
517 * Empty classes argument or zero-length class_column result in no samples filtered out.
518
519 INTERNAL MODIFICATIONS
520
521 * Support and tests for new features.
522
523
351 CHANGES IN VERSION 0.98.1 524 CHANGES IN VERSION 0.98.1
352 ========================= 525 =========================
353 526
354 NEW FEATURES 527 NEW FEATURES
355 528