Mercurial > repos > eschen42 > w4mclassfilter

<tool id="w4mclassfilter" name="Sample_Subset" version="0.98.7">
  <!-- this file is utf-8, not ASCII, because it contains the character é -->
  <description>Filter W4M data by sample class</description>

  <requirements>
    <!-- <requirement type="package" version="6.2">readline</requirement> -->
    <requirement type="package" version="3.4.1">r-base</requirement>
    <requirement type="package" version="1.1_4">r-batch</requirement>
    <requirement type="package" version="0.98.7">w4mclassfilter</requirement>
  </requirements>

  <stdio>
    <exit_code range="1:" level="fatal" />
  </stdio>


  <command detect_errors="aggressive"><![CDATA[
  Rscript $__tool_directory__/w4mclassfilter_wrapper.R
  dataMatrix_in '$dataMatrix_in'
  sampleMetadata_in '$sampleMetadata_in'
  variableMetadata_in '$variableMetadata_in'
  sampleclassNames '$sampleclassNames'
  inclusive '$inclusive'
  wildcards '$wildcards'
  classnameColumn '$classnameColumn'
  samplenameColumn '$samplenameColumn'
  variable_range_filter '$variableRangeFilter'
  dataMatrix_out '$dataMatrix_out'
  sampleMetadata_out '$sampleMetadata_out'
  variableMetadata_out '$variableMetadata_out'
  ]]></command>

  <inputs>
    <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="variable x sample, decimal: '.', missing: NA, mode: numerical, separator: tab" />
    <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="sample x metadata columns, separator: tab" />
    <param name="variableMetadata_in" label="Variable metadata file" type="data" format="tabular" help="variable x metadata columns, separator: tab" />
    <param name="samplenameColumn" label="Column that names the sample" type="text" value = "sampleMetadata" help="name of the column in the sample metadata file that has the name of the sample - defaults to 'sampleMetadata'" />
    <param name="classnameColumn" label="Column that names the sample-class" type="text" value = "class" help="name of the column in sample metadata that has the values to be tested against the 'classes' input parameter - defaults to 'class'" />
    <param name="sampleclassNames" label="Names of sample classes" type="text" value = "" help="comma-separated names (or comma-less regular expressions to match names) of sample-classes to filter in or out; defaults to no names">
      <sanitizer>
        <valid initial="string.letters">
          <add preset="string.digits"/>
          <add value="&#36;"  /> <!-- dollar, dollar-sign -->
          <add value="&#40;"  /> <!-- left-paren -->
          <add value="&#41;"  /> <!-- right-paren -->
          <add value="&#42;"  /> <!-- splat, asterisk -->
          <add value="&#43;"  /> <!-- plus -->
          <add value="&#44;"  /> <!-- comma -->
          <add value="&#46;"  /> <!-- dot, period -->
          <add value="&#58;"  /> <!-- colon -->
          <add value="&#59;"  /> <!-- semi, semicolon -->
          <add value="&#63;"  /> <!-- what, question mark -->
          <add value="&#91;"  /> <!-- l-squib, left-squre-bracket -->
          <add value="&#92;"  /> <!-- whack, backslash -->
          <add value="&#93;"  /> <!-- r-squib, right-squre-bracket -->
          <add value="&#94;"  /> <!-- hat, caret -->
          <add value="&#123;" /> <!-- l-cube, left-curly-bracket -->
          <add value="&#124;" /> <!-- pipe -->
          <add value="&#125;" /> <!-- r-cube, right-curly-bracket -->
        </valid>
      </sanitizer>
    </param>

    <param name="wildcards" label="Use wild-cards or regular-expressions" type="select" help="wild-cards (the default) - use '*' and '?' to match class names; regular-expressions - use comma-less regular expressions to match class names">
      <option value="TRUE" selected="true">wild-cards</option>
      <option value="FALSE">regular-expressions</option>
    </param>
    <param name="inclusive" label="Include named classes" type="select" help="filter-in - include only the named sample classes; filter-out (the default) - exclude only the named sample classes">
      <option value="TRUE">filter-in</option>
      <option value="FALSE" selected="true">filter-out</option>
    </param>

    <param name="variableRangeFilter" label="Variable range-filters" type="text" value = "" help="comma-separated filters, each specified as 'variableMetadataColumnName:min:max'; default is no filters.  (See help below.)">
      <sanitizer>
        <valid initial="string.letters">
          <add preset="string.digits"/>
          <add value="&#44;"  /> <!-- comma -->
          <add value="&#58;"  /> <!-- colon -->
          <add value="&#46;"  /> <!-- dot, period -->
        </valid>
      </sanitizer>
    </param>

  </inputs>
  <outputs>
    <data name="dataMatrix_out" label="${tool.name}_${dataMatrix_in.name}" format="tabular" ></data>
    <data name="sampleMetadata_out" label="${tool.name}_${sampleMetadata_in.name}" format="tabular" ></data>
    <data name="variableMetadata_out" label="${tool.name}_${variableMetadata_in.name}" format="tabular" ></data>
  </outputs>

  <tests>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="class"/>
      <param name="sampleclassNames" value=""/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-out"/>
      <param name="variableRangeFilter" value="FEATMAX:2e6:,mz:200:,rt::800"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <has_text text="HU_028" />
          <has_text text="HU_051" />
          <has_text text="HU_060" />
          <has_text text="HU_110" />
          <has_text text="HU_149" />
          <has_text text="HU_152" />
          <has_text text="HU_175" />
          <has_text text="HU_178" />
          <has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <has_text text="HU_208" />
          <has_text text="HU_017" />
          <has_text text="HU_034" />
          <has_text text="HU_078" />
          <has_text text="HU_091" />
          <has_text text="HU_093" />
          <has_text text="HU_099" />
          <has_text text="HU_130" />
          <has_text text="HU_134" />
          <has_text text="HU_138" />
        </assert_contents>
      </output>
      <output name="variableMetadata_out">
        <assert_contents>
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00208" />
          <not_has_text text="HMDB00251" />
          <not_has_text text="HMDB00299" />
          <not_has_text text="HMDB00512" />
          <not_has_text text="HMDB00518" />
          <not_has_text text="HMDB00715" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB01101.1" />
          <not_has_text text="HMDB03193" />
          <not_has_text text="HMDB04824" />
          <not_has_text text="HMDB10348" />
          <has_text     text="HMDB13189" />
          <not_has_text text="HMDB59717" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="dataMatrix_out">
        <assert_contents>
          <not_has_text text="HU_028" />
          <not_has_text text="HU_051" />
          <not_has_text text="HU_060" />
          <not_has_text text="HU_110" />
          <not_has_text text="HU_149" />
          <not_has_text text="HU_152" />
          <not_has_text text="HU_175" />
          <not_has_text text="HU_178" />
          <not_has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <not_has_text text="HU_208" />
          <has_text     text="HU_017" />
          <has_text     text="HU_034" />
          <has_text     text="HU_078" />
          <has_text     text="HU_091" />
          <has_text     text="HU_093" />
          <has_text     text="HU_099" />
          <has_text     text="HU_130" />
          <has_text     text="HU_134" />
          <has_text     text="HU_138" />
          <has_text     text="HMDB03193" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01101" />
          <has_text     text="HMDB01101.1" />
          <has_text     text="HMDB10348" />
          <has_text     text="HMDB59717" />
          <has_text     text="HMDB13189" />
          <has_text     text="HMDB00299" />
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00518" />
          <has_text     text="HMDB00715" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB00208" />
          <has_text     text="HMDB04824" />
          <has_text     text="HMDB00512" />
          <has_text     text="HMDB00251" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="*"/>
      <param name="wildcards" value="TRUE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <not_has_text text="HU_204" />
          <has_text text="HU_028" />
          <has_text text="HU_051" />
          <has_text text="HU_060" />
          <has_text text="HU_110" />
          <has_text text="HU_149" />
          <has_text text="HU_152" />
          <has_text text="HU_175" />
          <has_text text="HU_178" />
          <has_text text="HU_185" />
          <has_text text="HU_208" />
          <has_text text="HU_017" />
          <has_text text="HU_034" />
          <has_text text="HU_078" />
          <has_text text="HU_091" />
          <has_text text="HU_093" />
          <has_text text="HU_099" />
          <has_text text="HU_130" />
          <has_text text="HU_134" />
          <has_text text="HU_138" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <not_has_text text="HU_028" />
          <not_has_text text="HU_051" />
          <not_has_text text="HU_060" />
          <not_has_text text="HU_110" />
          <not_has_text text="HU_149" />
          <not_has_text text="HU_152" />
          <not_has_text text="HU_175" />
          <not_has_text text="HU_178" />
          <not_has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <not_has_text text="HU_208" />
          <has_text     text="HU_017" />
          <has_text     text="HU_034" />
          <has_text     text="HU_078" />
          <has_text     text="HU_091" />
          <has_text     text="HU_093" />
          <has_text     text="HU_099" />
          <has_text     text="HU_130" />
          <has_text     text="HU_134" />
          <has_text     text="HU_138" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="variableMetadata_out">
        <assert_contents>
          <has_text     text="HMDB03193" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01101" />
          <has_text     text="HMDB01101.1" />
          <has_text     text="HMDB10348" />
          <has_text     text="HMDB59717" />
          <has_text     text="HMDB13189" />
          <has_text     text="HMDB00299" />
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00518" />
          <has_text     text="HMDB00715" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB00208" />
          <has_text     text="HMDB04824" />
          <has_text     text="HMDB00512" />
          <has_text     text="HMDB00251" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_nofilter_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="M"/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="variableMetadata_out">
        <assert_contents>
          <has_text     text="HMDB03193" />
          <not_has_text text="HMDB00822" />
          <has_text     text="HMDB01101" />
          <has_text     text="HMDB01101.1" />
          <has_text     text="HMDB10348" />
          <has_text     text="HMDB59717" />
          <not_has_text text="HMDB13189" />
          <has_text     text="HMDB00299" />
          <has_text     text="HMDB00191" />
          <has_text     text="HMDB00518" />
          <has_text     text="HMDB00715" />
          <has_text     text="HMDB01032" />
          <has_text     text="HMDB00208" />
          <has_text     text="HMDB04824" />
          <has_text     text="HMDB00512" />
          <has_text     text="HMDB00251" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value="gender"/>
      <param name="sampleclassNames" value="[Mm],[fF]"/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <has_text text="HU_028" />
          <has_text text="HU_051" />
          <has_text text="HU_060" />
          <has_text text="HU_110" />
          <has_text text="HU_149" />
          <has_text text="HU_152" />
          <has_text text="HU_175" />
          <has_text text="HU_178" />
          <has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <has_text text="HU_208" />
          <has_text text="HU_017" />
          <has_text text="HU_034" />
          <has_text text="HU_078" />
          <has_text text="HU_091" />
          <has_text text="HU_093" />
          <has_text text="HU_099" />
          <has_text text="HU_130" />
          <has_text text="HU_134" />
          <has_text text="HU_138" />
        </assert_contents>
      </output>
    </test>
    <test>
      <param name="dataMatrix_in" value="input_dataMatrix.tsv"/>
      <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/>
      <param name="variableMetadata_in" value="input_variableMetadata.tsv"/>
      <param name="classnameColumn" value=""/>
      <param name="sampleclassNames" value="M"/>
      <param name="wildcards" value="FALSE"/>
      <param name="samplenameColumn" value="sampleMetadata"/>
      <param name="inclusive" value="filter-in"/>
      <output name="sampleMetadata_out">
        <assert_contents>
          <has_text text="HU_028" />
          <has_text text="HU_051" />
          <has_text text="HU_060" />
          <has_text text="HU_110" />
          <has_text text="HU_149" />
          <has_text text="HU_152" />
          <has_text text="HU_175" />
          <has_text text="HU_178" />
          <has_text text="HU_185" />
          <not_has_text text="HU_204" />
          <has_text text="HU_208" />
          <has_text text="HU_017" />
          <has_text text="HU_034" />
          <has_text text="HU_078" />
          <has_text text="HU_091" />
          <has_text text="HU_093" />
          <has_text text="HU_099" />
          <has_text text="HU_130" />
          <has_text text="HU_134" />
          <has_text text="HU_138" />
        </assert_contents>
      </output>
    </test>
  </tests>


  <help><![CDATA[


**Author** Arthur Eschenlauer (University of Minnesota, esch0041@umn.edu)

--------------------------------------------------------------------------


**R package**

The *w4mclassfilter* package is available from the Hegeman lab github repository (https://github.com/HegemanLab/w4mclassfilter/releases).

-----------------------------------------------------------------------------------------------------------------------------------------


**Tool updates**

See the **NEWS** section at the bottom of this page

---------------------------------------------------

==============================================
Filter Workflow4Metabolomics data matrix files
==============================================

-----------
Description
-----------

Filter a set of retention-corrected W4M files (dataMatrix, sampleMetadata, variableMetadata) by sample-class

-----------------
Workflow Position
-----------------

- Upstream tool category: Preprocessing
- Downstream tool categories: Normalisation, Statistical Analysis, Quality Control, Filter and Sort

----------
Motivation
----------

GC-MS1 and LC-MS1 experiments seek to resolve chemicals as features that have distinct chromatographic behavior and (after ionization) mass-to-charge ratio.
Data for a sample are collected as MS intensities, each of which is associated with a position on a 2D plane with dimensions of m/z ratio and chromatographic retention time.
Ideally, features would be sufficiently reproducible from sample-run to sample-run to identify features that are commmon among samples and those that differ.
However, the chromatographic retention time for a chemical can vary from one run to another.
In the Workflow4Metabolomics (W4M, [Giacomoni *et al.*, 2014, Guitton *et al.* 2017]) "flavor" of Galaxy, the XCMS [Smith *et al.*, 2006] preprocessing tools provide for "retention time correction" to align features among samples, but features may be better aligned if pooled samples and blanks are included.

Multivariate statistical techniques may be used to discover clusters of similar samples, and sometimes it is desirable to apply clustering iteratively to smaller and smaller subsets of samples until observable separation of clusters is no longer significant.
Once feature-alignment has been achieved among samples in GC-MS and LC-MS datasets, however, the presence of pools and blanks may confound identification and separation of clusters.
Multivariate statistical algorithms also may be impacted by missing values or dimensions that have zero variance (Thévenot *et al.*, 2015).

The w4mclassfilter tool provides a way to choose subsets of samples for further analysis.
The tool takes as input the data matrix, sample metadata, and variable metadata Galaxy datasets produced by W4M and produces the same trio of datasets with data only for the selected samples.
The tool uses a "sample-class" column in the sample metadata as the basis for including or eliminating samples for further analysis.
Class-values to be considered are provided by the user as a comma-separated list.
The user also provides an indication whether the list specifies classes to be included in further analysis ("filter-in") or rather to be excluded from it ("filter-out").
Next, missing and negative intensites for features of the remaining samples are imputed to zero.
Finally, samples or features with zero variance are eliminated.

-----------
Input files
-----------

+---------------------------+------------+
| File                      |   Format   |
+===========================+============+
|     Data matrix           |   tabular  |
+---------------------------+------------+
|     Sample metadata       |   tabular  |
+---------------------------+------------+
|     Variable metadata     |   tabular  |
+---------------------------+------------+


----------
Parameters
----------

Data matrix file
	| variable x sample **dataMatrix** (tabular separated values) file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the sample and variable metadata, respectively (see below)
	|

Sample metadata file
	| sample x metadata **sampleMetadata** (tabular separated values) file of the numeric and/or character sample metadata, with . as decimal and NA for missing values
	|

Variable metadata file
	| variable x metadata **variableMetadata** (tabular separated values) file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
	|

Column that names the sample (default = '``sampleMetadata``')
	| name of the column in sample metadata that has the name of the sample
	|

Column that names the sample-class (default = '``class``')
	| name of the column in sample metadata that has the values to be tested against the '``classes``' input parameter
	|

Names of sample classes (default = no names)
	| comma-separated names of sample classes to include or exclude
	|

Wild-cards (default = '``wild-cards``')
	| '``wild-cards``' - use wild-cards to match names of sample classes (see 'Wild card patterns to match class-names' below)
	| '``regular-expressions``' - exclude only the named sample classes (see 'Regular expression patterns to match class-names' below)
	|

Include named classes (default = '``filter-out``')
	| '``filter-in``' - include only the named sample classes
	| '``filter-out``' - exclude only the named sample classes
	|

Variable-range filters (default = no filters)
	| comma-separated names of variable-range filters (see 'Variable-range filters' below)
	|


------------
Output files
------------


sampleMetadata
	| (tabular separated values) file identical to the **sampleMetadata** file given as an input argument, excepting lacking rows for samples (xC-MS features) that have been filtered out (by the sample-class filter or because of zero variance)
	|

variableMetadata
	| (tabular separated values) file identical to the **variableMetadata** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance)
	|

dataMatrix
	| (tabular separated values) file identical to the **dataMatrix** file given as an input argument, excepting lacking rows for variables (xC-MS features) that have been filtered out (because of zero variance) and columns that have been filtered out (by the sample-class filter or because of zero variance)
	|


---------------------------------------
Wild card patterns to match class-names
---------------------------------------

Beginning with v0.98.2, w4mclassfilter supports use of R "wild card" patterns to select class-names.

- use '``?``' to match a single character
- use '``*``' to match zero or more characters
- the entire pattern must match the sample name

For example

- '``??.samp*``' matches '``my.sample``' but not '``my.own.sample``'
- '``*.sample``' matches '``my.sample``' and '``my.own.sample``'
- '``*.sampl``' matches neither '``my.sample``' nor '``my.own.sample``'

------------------------------------------------
Regular expression patterns to match class-names
------------------------------------------------

Beginning with v0.98.2, w4mclassfilter supports use of R "regular expression" patterns to select class-names.

R uses POSIX 1003.2 standard regular expressions, which allow precise pattern-matching and are exhaustively defined at:
http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html

However, only a few basic building blocks of regular expressions need to be mastered for most cases:

- '``^``' matches the beginning of a class-name
- '``$``' matches the end of a class-name
- '``.``' outside of square brackets matches a single character
- '``*``' matches character specified immediately before zero or more times
- square brackets specify a set of characters to be matched.

Within square brackets

- '``^``' as the first character specifies that the list of characters are those that should **not** be matched.
- '``-``' is used to specify ranges of characters

Caveat: The tool wrapper uses the comma ('``,``') to split a list of sample-class names, so **commas may not be used within regular expressions for this tool**

First Example: Consider a field of class-names consisting of '``marq3,marq6,marq9,marq12,front3,front6,front9,front12``'

- The regular expression '``^front[0-9][0-9]*$``' will match the same sample-classes as '``front3,front6,front9,front12``'
- The regular expression '``^[a-z][a-z]3$``' will match the same sample-classes as '``front3,marq3``'
- The regular expression '``^[a-z][a-z]12$``' will match the same sample-classes as '``front12,marq12``'
- The regular expression '``^[a-z][a-z][0-9]$``' will match the same sample-classes as '``front3,front6,front9,marq3,marq6,marq9``'

Second Example: Consider these regular expression patterns as possible matches to a sample-class name '``AB0123``':

- '``^[A-Z][A-Z][0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
- '``^[A-Z][A-Z]*[0-9][0-9]*$``' - MATCHES '``**^AB0123$**``'
- '``^[A-Z][0-9]*``' - MATCHES  '``**^A** B0123$``' - first character is a letter, '``*``' can specify zero characters, and end of line did not need to be matched.
- '``^[A-Z][A-Z][0-9]``' - MATCHES  '``**^AB0** 123$``' - first two characters are letters aind the third is a digit.
- '``^[A-Z][A-Z]*[0-9][0-9]$``' - NO MATCH - the name does not end with the pattern '``[A-Z][0-9][0-9]$``', i.e., it ends with four digits, not two.
- '``^[A-Z][0-9]*$``' - NO MATCH - the pattern specifies that second character and all those that follow, if present, must be digits.

----------------------
Variable-range filters
----------------------

An array of range-specification strings may be supplied in the `variableRangeFilter`
argument.  If supplied, only features having numerical values in the specified column
of `variableMetadata` that fall within the specified ranges will be retained
in the output.  Each range is a string of three colon-separated values (e.g., "mz:200:800") in the
following order:

- the **name of a column of `variableMetadata`** which must have numerical data (e.g., "mz");
- the **minimum allowed value** in that column for the feature to be retained (e.g., 200);
- the **maximum allowed value** (e.g., 800).

Note for the range specification strings:

- **If the "maximum" is less than the "minimum", then the range is exclusive**  (e.g., "mz:800:200" means retain only features whose mz is NOT in the range 200-800)
- **If the name supplied in the first field is 'FEATMAX',**  then the string is defining the minimum (and possibly, though less useful, maximum) intensity for each feature in the dataMatrix.  For example, "FEATMAX:1e6:" would specify  that any feature would be excluded if no sample had an intensity for that feature greater than 1000000.

  - Note, however, that when the "maximum" is greater than the "minimum" for the FEATMAX range specification, then the specification is ignored.

-----------------------------------------------------------------------------

----------------
WORKING EXAMPLES
----------------

.. class:: infomark

-----------
Input Files
-----------

+------------------------------------------------------------------------------------------------------------------------+
| Input File URL                                                                                                         |
+========================================================================================================================+
| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_dataMatrix.tsv       |
+------------------------------------------------------------------------------------------------------------------------+
| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv   |
+------------------------------------------------------------------------------------------------------------------------+
| https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/input_variableMetadata.tsv |
+------------------------------------------------------------------------------------------------------------------------+

.. class:: infomark

-------------------------------
Running Without Range-Filtering
-------------------------------

This example retains only samples whose 'gender' attribute is 'M'.

**Input parameters**

+------------------------------------+-----------------+
| Input Parameter                    | Value           |
+====================================+=================+
| Names of sample classes            | M               |
+------------------------------------+-----------------+
| Include named classes              | filter-in       |
+------------------------------------+-----------------+
| Column that names the sample-class | gender          |
+------------------------------------+-----------------+
| Column that names the sample       | sampleMetadata  |
+------------------------------------+-----------------+

**Expected outputs**

+-------------------+---------------------------------------------------------------------------------------------------------------------------+
| Expected Output   | Download from URL                                                                                                         |
+===================+===========================================================================================================================+
| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_dataMatrix.tsv       |
+-------------------+---------------------------------------------------------------------------------------------------------------------------+
| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_sampleMetadata.tsv   |
+-------------------+---------------------------------------------------------------------------------------------------------------------------+
| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/expected_variableMetadata.tsv |
+-------------------+---------------------------------------------------------------------------------------------------------------------------+

.. class:: infomark

----------------------------
Running With Range-Filtering
----------------------------

This example retains only features whose mz is greater than 200, whose rt is less than 800, and whose maximum intensity across all samples is 2,000,000.
This example retains all samples (except those having zero variance for all feature), although it would be possible to filter on samples as well.

**Input parameters**

+------------------------------------+-------------------------------+
| Input Parameter                    | Value                         |
+====================================+===============================+
| Names of sample classes            | (Leave this field empty.)     |
+------------------------------------+-------------------------------+
| Include named classes              | filter-out                    |
+------------------------------------+-------------------------------+
| Column that names the sample-class | class                         |
+------------------------------------+-------------------------------+
| Column that names the sample       | sampleMetadata                |
+------------------------------------+-------------------------------+
| Variable range-filters             | FEATMAX:2e6:,mz:200:,rt::800  |
+------------------------------------+-------------------------------+

**Expected outputs**

+-------------------+------------------------------------------------------------------------------------------------------------------------------+
| Expected Output   | Download from URL                                                                                                            |
+===================+==============================================================================================================================+
| Data matrix       | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_dataMatrix.tsv       |
+-------------------+------------------------------------------------------------------------------------------------------------------------------+
| Sample metadata   | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_sampleMetadata.tsv   |
+-------------------+------------------------------------------------------------------------------------------------------------------------------+
| Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mclassfilter_galaxy_wrapper/master/test-data/rangefilter_variableMetadata.tsv |
+-------------------+------------------------------------------------------------------------------------------------------------------------------+

-----------------------------------------------------------------------------

----
NEWS
----

CHANGES IN VERSION 0.98.7
=========================

New features

* First column of output variableMetadata (that has feature names) now is always named "variableMetadata".
* First column of output sampleMetadata now (that has sample names) is always named "sampleMetadata".

Internal modifications

* Now uses w4mclassfilter R package v0.98.7.

CHANGES IN VERSION 0.98.6
=========================

New features

* Added support for filtering out features whose attributes fall outside specified ranges.
  For more detail, see "Variable-range filters" above.

Internal modifications

* Now uses w4mclassfilter R package v0.98.6.
* Now sorts sample names and feature names in output files because some statistical tools expect the same order in `dataMatrix` row and column names as in the corresponding metadata files.

Changes in version 0.98.3
=========================

Internal modifications

* Improved input handling.
* Now uses w4mclassfilter R package v0.98.3, although that version has no functional implications for this tool.
* Improved reference-list.

Changes in version 0.98.2
=========================

New features

* Added support for R-flavored regular expression pattern-matching when selecting names of sample-classes.
* Empty classes argument or zero-length class_column result in no samples filtered out.

Internal modifications

* Support and tests for new features.

Changes in version 0.98.1
=========================

First release - Wrap the w4mclassfilter R package that implements filtering of W4M data matrix, variable metadata, and sample metadata by class of sample.

New features

* *dataMatrix* *is* modified by the tool, so it *does* appear as an output file
* *sampleMetadata* *is* modified by the tool, so it *does* appear as an output file
* *variableMetadata* *is* modified by the tool, so it *does* appear as an output file

Internal modifications

* N/A

  ]]></help>
  <citations>
    <!-- Giacomoni_2014 W4M 2.5 -->
    <citation type="doi">10.1093/bioinformatics/btu813</citation>
    <!-- Guitton_2017 W4M 3.0 -->
    <citation type="doi">10.1016/j.biocel.2017.07.002</citation>
    <!-- Smith_2006 XCMS -->
    <citation type="doi">10.1021/ac051437y</citation>
    <!-- Th_venot_2015 Urinary metabolome statistics -->
    <citation type="doi">10.1021/acs.jproteome.5b00354</citation>
  </citations>
  <!--
     vim:noet:sw=2:ts=2
--> </tool>
author	eschen42
date	Mon, 29 Jan 2018 21:20:07 -0500
parents	38ccf6722d54
children	d5cf23369d12